Exemple #1
0
        public void LoadSpec()
        {
            using (ITabularReader r = new CsvReader(this.SpecFilePath, false))
            {
                while (r.NextRow())
                {
                    string command    = r.Current(0).ToString();
                    string columnName = r.Current(1).ToString();

                    switch (command.ToLowerInvariant())
                    {
                    case "sample":
                        // Sample,ColumnName,Probability
                        this.SampleColumnName  = columnName;
                        this.SampleProbability = double.Parse(r.Current(2).ToString());
                        if (this.SampleProbability < 0.0 || this.SampleProbability > 1.0)
                        {
                            throw new UsageException($"SanitizeSpec sample probability ({r.Current(2)}) is out of range. It must be between zero and one.");
                        }
                        break;

                    case "echo":
                        // Echo,Value1,Value2,...
                        for (int i = 1; i < r.CurrentRowColumns; ++i)
                        {
                            this.EchoValues.Add(r.Current(i).ToString8());
                        }
                        break;

                    case "drop":
                        // Drop,ColumnName
                        this.DropColumns.Add(columnName);
                        break;

                    case "map":
                        // Map,ColumnName,MapperName
                        this.HandlersByColumn.Add(columnName, new EchoColumnHandler(this.EchoValues, new MapColumnHandler(this.HashKeyHash, this.Provider.Mapper(r.Current(2).ToString()))));
                        break;

                    case "keep":
                        // Keep is the default behavior, so it is in the sanispec only as a comment
                        break;

                    case "regex":
                        // Regex,ColumnName,Expression,MapperName
                        MapColumnHandler handler = new MapColumnHandler(this.HashKeyHash, this.Provider.Mapper(r.Current(3).ToString()));
                        this.HandlersByColumn.Add(columnName, new EchoColumnHandler(this.EchoValues, new RegexColumnHandler(r.Current(2).ToString(), handler)));
                        break;

                    default:
                        throw new UsageException($"SanitizeSpec mode '{command}' is unknown. Supported modes: sample, echo, drop, map, keep, regex.");
                    }
                }
            }
        }
        private List <Result> GetResultsFromStream(Stream input)
        {
            var results = new List <Result>();

            using (_parser = new CsvReader(input))
            {
                while (_parser.NextRow())
                {
                    results.Add(ParseResult(_parser.Current()));
                }
            }

            return(results);
        }
Exemple #3
0
        public void CsvReader_Basics()
        {
            int bufferSize = 64;

            // Empty file - verify no rows
            using (CsvReader reader = new CsvReader(StreamFromString(""), bufferSize))
            {
                Assert.False(reader.NextRow());

                // Verify double-dispose causes no problem
                reader.Dispose();
            }

            // Single row file, no trailing newline
            using (CsvReader reader = new CsvReader(StreamFromString("One,Two,Three"), bufferSize))
            {
                Assert.Equal(0, reader.RowCountRead);
                Assert.True(reader.NextRow());
                Assert.Equal(1, reader.RowCountRead);
                Assert.Equal("One|Two|Three", string.Join("|", reader.Current()));
                Assert.False(reader.NextRow());
                Assert.Equal(1, reader.RowCountRead);
            }

            // Empty values
            using (CsvReader reader = new CsvReader(StreamFromString(",Value,,"), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal("|Value||", string.Join("|", reader.Current()));
                Assert.False(reader.NextRow());
            }

            // Newline variation and trailing newline
            using (CsvReader reader = new CsvReader(StreamFromString("One\nTwo\r\nThree\r\n"), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal("One", string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Equal("Two", string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Equal("Three", string.Join("|", reader.Current()));
                Assert.False(reader.NextRow());
            }

            // Row requiring a buffer resize and verify nothing is missed
            string oneHundredColumns = string.Join(",", Enumerable.Range(100, 100).Select(i => i.ToString()));

            using (CsvReader reader = new CsvReader(StreamFromString(oneHundredColumns), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal(100, reader.Current().Count);
                Assert.False(reader.NextRow());
            }

            // Value exactly 2x buffer, requiring two buffer resizes to be read
            string valueRequiringBufferExpand = new string('0', 128);

            using (CsvReader reader = new CsvReader(StreamFromString($"One,Two,Three\r\nSecond,Row\r\n{valueRequiringBufferExpand}"), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal("One|Two|Three", string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Equal("Second|Row", string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Single(reader.Current());
                Assert.Equal(valueRequiringBufferExpand, reader.Current()[0]);
            }

            // '\r' exactly at buffer boundary, requiring refill to track the unread '\n' to ignore
            using (CsvReader reader = new CsvReader(StreamFromString($"{new string('0', 63)}\r\nNextRow\r\n"), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal(new string('0', 63), string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Equal("NextRow", string.Join("|", reader.Current()));
                Assert.False(reader.NextRow());
            }

            // Quoted value variations - quoted empty, quotes at start/end, quote in middle, unquoted after quoted, adjacent escaped quotes
            using (CsvReader reader = new CsvReader(StreamFromString("\"\",\"\"\"Around\"\"\",\"With\"\"in\",None,\"Many\"\"\"\"\"\nNextRow"), bufferSize))
            {
                Assert.True(reader.NextRow());
                Assert.Equal("|\"Around\"|With\"in|None|Many\"\"", string.Join("|", reader.Current()));
                Assert.True(reader.NextRow());
                Assert.Equal("NextRow", string.Join("|", reader.Current()));
                Assert.False(reader.NextRow());
            }

            // Exception for unescaped quote not at cell end
            using (CsvReader reader = new CsvReader(StreamFromString("\"Unescaped\"Quote,"), bufferSize))
            {
                Assert.Throws <IOException>(() => reader.NextRow());
            }
        }