Ejemplo n.º 1
0
        public void ParseWithHeadersHeaders()
        {
            string content = @"
                             First,Second,Third
                             A,B,C
                             D,E,F";

            var settings = new CsvReaderSettings()
            {
                HasHeaders = true
            };

            using (var reader = GetTestReader(content, settings))
            {
                Assert.AreEqual(3, reader.ColumnCount);

                var rows = reader.Rows.ToArray();

                Assert.AreEqual(2, rows.Length);

                Assert.AreEqual("A", rows[0][0].ToString());
                Assert.AreEqual("B", rows[0][1].ToString());
                Assert.AreEqual("C", rows[0][2].ToString());

                Assert.AreEqual("D", rows[1][0].ToString());
                Assert.AreEqual("E", rows[1][1].ToString());
                Assert.AreEqual("F", rows[1][2].ToString());

                Assert.AreEqual("First", reader.ColumnNames[0]);
                Assert.AreEqual("Second", reader.ColumnNames[1]);
                Assert.AreEqual("Third", reader.ColumnNames[2]);
            }
        }
Ejemplo n.º 2
0
        private static M <double> Transform(IReadable readable, CsvReaderSettings csvReaderSettings)
        {
            using (var sr = new StreamReader(readable.Open()))
            {
                var csvReader = new CsvReader(csvReaderSettings);
                var splitted  = csvReader.Read(sr).ToArray();
                var rows      = splitted.Length;

                int maxCols = 0;
                if (rows > 0)
                {
                    maxCols = splitted[0].Length;
                    for (var i = 1; i < rows; i++)
                    {
                        if (splitted[i].Length > maxCols)
                        {
                            maxCols = splitted[i].Length;
                        }
                    }
                }

                return(M.Generate <double>(rows, maxCols, (row, col) =>
                {
                    double value;
                    if (row < rows && col < splitted[row].Length && double.TryParse(splitted[row][col], NumberStyles.Any, CultureInfo.InvariantCulture, out value))
                    {
                        return value;
                    }
                    return 0;
                }));
            }
        }
Ejemplo n.º 3
0
        public void ParseQuoted()
        {
            string content = @"
                             ""A"",""B"",""C""
                             ""D"",""E"",""F""";

            var settings = new CsvReaderSettings()
            {
                HasHeaders = false
            };

            using (var reader = GetTestReader(content, settings))
            {
                Assert.AreEqual(3, reader.ColumnCount);

                var rows = reader.Rows.ToArray();

                Assert.AreEqual(2, rows.Length);

                Assert.AreEqual("A", rows[0][0].ToString());
                Assert.AreEqual("B", rows[0][1].ToString());
                Assert.AreEqual("C", rows[0][2].ToString());

                Assert.AreEqual("D", rows[1][0].ToString());
                Assert.AreEqual("E", rows[1][1].ToString());
                Assert.AreEqual("F", rows[1][2].ToString());
            }
        }
Ejemplo n.º 4
0
        public static M <double> Read(
            [InputPin(Name = "Readable", Description = "readable", PropertyMode = PropertyMode.Allow)]
            IReadable readable,
            [InputPin(Name = "Delimiter", Description = "Delimiter", PropertyMode = PropertyMode.Default, Editor = WellKnownEditors.SingleLineText)]
            string delimiter = ",",
            [InputPin(Name = "SkipEmptyLines", Description = "Should empty lines be skipped?", PropertyMode = PropertyMode.Default)]
            bool skipEmptyLines = true,
            [InputPin(Name = "SkipFirstLine", Description = "Should the first line be skipped?", PropertyMode = PropertyMode.Default)]
            bool skipFirstLine = false,
            [InputPin(Name = "MaxTokenLength", Description = "Maximal length of a token.", PropertyMode = PropertyMode.Default)]
            int?maxTokenLength = 1024 * 1024,
            [InputPin(Name = "StartOfComment", Description = "Start of a comment line", PropertyMode = PropertyMode.Default, Editor = WellKnownEditors.SingleLineText)]
            string startOfComment = "#"
            )
        {
            var settings = new CsvReaderSettings
            {
                Delimiters     = delimiter.ToCharArray(),
                SkipEmptyLines = skipEmptyLines,
                SkipFirstLine  = skipFirstLine,
                MaxTokenLength = maxTokenLength,
                StartOfComment = startOfComment,
            };

            return(Transform(readable, settings));
        }
Ejemplo n.º 5
0
        public void ParseWithRowNumberAndHeaders()
        {
            // This is realy """A""","B","C"
            string content = "C1,C2,C3\r\nA,B,C";

            var settings = new CsvReaderSettings()
            {
                HasHeaders = true,
                IncludeRowNumberAsColumn = true,
                RowNumberColumnName      = "RowNumber"
            };

            using (var reader = GetTestReader(content, settings))
            {
                Assert.AreEqual(4, reader.ColumnCount);

                var rows = reader.Rows.ToArray();

                Assert.AreEqual("RowNumber", reader.ColumnNames[0]);

                Assert.AreEqual("1", rows[0][0].ToString());
                Assert.AreEqual("A", rows[0][1].ToString());
                Assert.AreEqual("B", rows[0][2].ToString());
                Assert.AreEqual("C", rows[0][3].ToString());
            }
        }
Ejemplo n.º 6
0
        static void TestGenericReader()
        {
            var csvFiles = new String[] {
                @"../TestData.Csv/csv-students.csv",
            };
            var csvReaderSettings = new CsvReaderSettings();

            csvReaderSettings.Encoding = System.Text.Encoding.UTF8;
            var dataResolver = new StudentResolver();

            foreach (var csvFile in csvFiles)
            {
                var recordCount = 0;
                var csvFilePath = ProcessCsvFile(csvFile);

                var startTime = DateTime.Now;
                using (var reader = CsvReader <Student> .Create(csvFilePath, csvReaderSettings, dataResolver))
                {
                    foreach (var student in reader)
                    {
                        ++recordCount;
                    }
                }
                var ellapsed = (DateTime.Now - startTime).TotalSeconds;

                Console.WriteLine("Count: {0}\tTime: {1}ms", recordCount, ellapsed);
                Console.WriteLine("======================");
            }
        }
Ejemplo n.º 7
0
        public void TestRead()
        {
            string expectedResult = @"""1"",""2"",""3"",NULL
""a1"",""b,c"",""NULL"",""d1""
"""","""","""",""""
""x1"",NULL,""y1"",""z1""
"""","""","""",""""
""fghi"",""k
lmop"",""qrsu"",""vwxy""
"""","""","""",""""
"""","""","""",""""";

            string csv = @"1,2,3,NULL
a1,""b,c"",""NULL"",d1
"""","""","""",""""
x1,NULL,y1,""z1""
,,,
""fghi"",""k
lmop"",""qrsu"",""vwxy""
,,,
,,,";

            var settings = new CsvReaderSettings()
            {
                ColumnSeparator   = ',',
                Encoding          = Encoding.UTF8,
                NullValue         = "NULL",
                TextQualification = CsvTextQualification.AsNeeded,
                TextQualifier     = '"'
            };

            var stream = new MemoryStream(settings.Encoding.GetBytes(csv));

            var result = new StringBuilder();

            using (var reader = new CsvReader(stream, settings))
            {
                string[] line = null;
                while ((line = reader.Read()) != null)
                {
                    result.AppendLine(string.Join(",", line.Select(c => c == null ? "NULL" : string.Format("\"{0}\"", c))));
                }
            }

            Console.WriteLine("Expects:");
            Console.WriteLine(expectedResult);
            Console.WriteLine();
            Console.WriteLine("Result:");
            Console.WriteLine(result);

            Assert.AreEqual(result.ToString().Trim(), expectedResult);
        }
Ejemplo n.º 8
0
        protected override Task <object[]> EvaluateInternal(object[] inputs, CancellationToken cancel)
        {
            var csvSettings = new CsvReaderSettings
            {
                Delimiters     = new char[] { ',' },
                SkipEmptyLines = false,
                SkipFirstLine  = false,
                MaxTokenLength = 1024 * 1024,
                StartOfComment = ""
            };

            return(Task.FromResult(new object[] { Transform(properties.Get <string>(this.value.Id), csvSettings) }));
        }
Ejemplo n.º 9
0
        private IResponse CsvAppend(IRequestContext ctx, Route route)
        {
            var content = ctx.Request.Headers["Content-Type"];

            if (String.IsNullOrEmpty(content) || !String.Equals(content, "text/csv", StringComparison.OrdinalIgnoreCase))
            {
                return(ArribaResponse.BadRequest("Content-Type of {0} was not expected", content));
            }
            else if (!ctx.Request.HasBody)
            {
                return(ArribaResponse.BadRequest("Empty request body"));
            }

            var tableName = GetAndValidateTableName(route);
            var table     = this.Database[tableName];

            if (table == null)
            {
                return(ArribaResponse.BadRequest("Table {0} is not loaded or does not exist", tableName));
            }

            var response = new ImportResponse();

            response.TableName = tableName;

            var config = new CsvReaderSettings()
            {
                DisposeStream = true, HasHeaders = true
            };

            var detail = new
            {
                RequestSize = ctx.Request.Headers["Content-Length"]
            };

            using (ctx.Monitor(MonitorEventLevel.Information, "Import.Csv", type: "Table", identity: tableName, detail: detail))
            {
                using (CsvReader reader = new CsvReader(ctx.Request.InputStream, config))
                {
                    response.Columns = reader.ColumnNames;

                    foreach (var blockBatch in reader.ReadAsDataBlockBatch(BatchSize))
                    {
                        response.RowCount += blockBatch.RowCount;
                        table.AddOrUpdate(blockBatch);
                    }
                }
            }

            return(ArribaResponse.Created(response));
        }
Ejemplo n.º 10
0
        static void TestSpecificReader()
        {
            var csvFiles = new String[] {
                //@"../TestData.Csv/csv-bigdata.csv",
                @"../TestData.Csv/csv-comma-delimited.csv",
                //@"../TestData.Csv/csv-lumentest2.csv",
                //@"../TestData.Csv/csv-lumentest3.csv",
                @"../TestData.Csv/csv-macintosh.csv",
                @"../TestData.Csv/csv-ms-dos.csv",
                @"../TestData.Csv/csv-ms-dos-complex.csv",
                //@"../TestData.Csv/csv-students.csv",
                //@"../TestData.Csv/longrowdata.csv",
            };

            var csvReaderSettings = new CsvReaderSettings();

            csvReaderSettings.IgnoreErrors   = true;
            csvReaderSettings.Encoding       = System.Text.Encoding.UTF8;
            csvReaderSettings.SkipEmptyLines = true;

            foreach (var csvFile in csvFiles)
            {
                var csvFilePath = ProcessCsvFile(csvFile);

                Int32 recordCount = 0, cellCount = 0;
                var   fileSize = new FileInfo(csvFilePath).Length;

                var startTime = DateTime.Now;
                using (var reader = CsvReader.Create(csvFilePath, csvReaderSettings))
                {
                    foreach (var data in reader)
                    {
                        ++recordCount;
                        cellCount += data.Count;
                    }
                }
                var ellapsed = (DateTime.Now - startTime).TotalSeconds;

                ellapsed = ellapsed == 0 ? 0.000003 : ellapsed;
                var speed = (fileSize / MB / ellapsed).ToString("0.00");
                Console.WriteLine("RC: {0,-9}CC: {1,-10}T(s): {2,-11}S:{3}M/s",
                                  recordCount, cellCount, ellapsed, speed);
                Console.WriteLine("======================");
            }
            Console.WriteLine();
        }
Ejemplo n.º 11
0
        protected override Task <object[]> EvaluateInternal(object[] inputs, System.Threading.CancellationToken cancel)
        {
            var      input          = (IReadable)inputs[1];
            var      targetType     = inputs[0];
            TypeCode conversionType = typeCodeLookUp[(ConvertTypeCode)targetType];
            var      csvSettings    = new CsvReaderSettings
            {
                Delimiters     = new char[] { (char)this.Properties.GetValue("Delimiter") },
                SkipEmptyLines = false,
                SkipFirstLine  = false,
                MaxTokenLength = 1024 * 1024,
                StartOfComment = ""
            };
            var v = Transform(input, csvSettings);

            switch ((ConvertTypeCode)targetType)
            {
            case ConvertTypeCode.Any:
                return(Task.FromResult(new object[] { v.Convert(typeof(object)) }));

            case ConvertTypeCode.Boolean:
                return(Task.FromResult(new object[] { v.Convert(typeof(bool)) }));

            case ConvertTypeCode.Byte:
                return(Task.FromResult(new object[] { v.Convert(typeof(byte)) }));

            case ConvertTypeCode.Int16:
                return(Task.FromResult(new object[] { v.Convert(typeof(Int16)) }));

            case ConvertTypeCode.Int32:
                return(Task.FromResult(new object[] { v.Convert(typeof(Int32)) }));

            case ConvertTypeCode.Int64:
                return(Task.FromResult(new object[] { v.Convert(typeof(decimal)) }));

            case ConvertTypeCode.Float32:
                return(Task.FromResult(new object[] { v.Convert(typeof(Single)) }));

            case ConvertTypeCode.Float64:
                return(Task.FromResult(new object[] { v.Convert(typeof(double)) }));
            }

            return(Task.FromResult(new object[] { v.Convert(typeof(object)) }));
        }
        private IResponse CsvSample(IRequestContext ctx, Route route)
        {
            if (!ctx.Request.HasBody)
            {
                return(ArribaResponse.BadRequest("Empty request body"));
            }

            SampleResult result = new SampleResult();

            var config = new CsvReaderSettings()
            {
                DisposeStream = true, HasHeaders = true
            };

            using (CsvReader reader = new CsvReader(ctx.Request.InputStream, config))
            {
                // Read the CSV fragment into a DataBlock
                DataBlock block = reader.ReadAsDataBlockBatch(10000, true).FirstOrDefault();

                if (block == null)
                {
                    return(ArribaResponse.BadRequest("No result content found."));
                }

                // Count the rows actually returned
                result.RowCount = block.RowCount + 1;

                // Insert only the first 100 rows and not the last (partial) row
                block.SetRowCount(Math.Min(block.RowCount - 1, 100));

                // Build a table with the sample
                Table sample = new Table("Sample", 100);
                sample.AddOrUpdate(block, new AddOrUpdateOptions()
                {
                    AddMissingColumns = true
                });

                // Return the created columns in the order they appeared in the CSV
                result.Columns = sample.ColumnDetails.OrderBy((cd) => block.IndexOfColumn(cd.Name)).ToList();

                // Return the columns and row count from the sample
                return(ArribaResponse.Ok(result));
            }
        }
        private IResponse CsvAppend(IRequestContext ctx, Route route)
        {
            if (!ctx.Request.HasBody)
            {
                return(ArribaResponse.BadRequest("Empty request body"));
            }

            var tableName = GetAndValidateTableName(route);
            var table     = this.Database[tableName];

            if (table == null)
            {
                return(ArribaResponse.BadRequest("Table {0} is not loaded or does not exist", tableName));
            }

            var response = new ImportResponse();

            response.TableName = tableName;

            var config = new CsvReaderSettings()
            {
                DisposeStream = true, HasHeaders = true
            };

            using (ctx.Monitor(MonitorEventLevel.Information, "Import.Csv", type: "Table", identity: tableName))
            {
                using (CsvReader reader = new CsvReader(ctx.Request.InputStream, config))
                {
                    response.Columns = reader.ColumnNames;

                    foreach (var blockBatch in reader.ReadAsDataBlockBatch(BatchSize))
                    {
                        response.RowCount += blockBatch.RowCount;
                        table.AddOrUpdate(blockBatch, new AddOrUpdateOptions()
                        {
                            AddMissingColumns = true
                        });
                    }
                }
            }

            return(ArribaResponse.Ok(response));
        }
Ejemplo n.º 14
0
        private static void TestReadFromFile()
        {
            string path = @"C:\tmp\test-read.csv";

            CsvReaderSettings settings = new CsvReaderSettings();

            settings.ColumnSeparator = ',';
            settings.DetectEncodingFromByteOrderMarks = true;

            using (var stream = File.OpenRead(path))
                using (CsvReader reader = new CsvReader(stream, settings))
                {
                    string[] line = null;
                    while ((line = reader.Read()) != null)
                    {
                        Console.WriteLine(string.Join(string.Join(",", line.Select(c => string.Join(c, "\"", "\""))), "[", "]"));
                    }
                }
        }
Ejemplo n.º 15
0
        public void TestRead()
        {
            var settings = new CsvReaderSettings()
            {
                ColumnSeparator   = ',',
                Encoding          = Encoding.UTF8,
                NullValue         = "NULL",
                TextQualification = CsvTextQualification.AsNeeded,
                TextQualifier     = '"'
            };


            var rawExpectedResult = new StreamReader(
                Assembly.GetExecutingAssembly().GetManifestResourceStream("Uncomplicated.Csv.UnitTest.test-read-with-qualifiers.json")
                , true).ReadToEnd();

            var jsonExpectedResult = JToken.Parse(rawExpectedResult);

            var csvStream = Assembly.GetExecutingAssembly().GetManifestResourceStream("Uncomplicated.Csv.UnitTest.test-read.csv");

            var lines = new List <string[]>();

            using (var reader = new CsvReader(csvStream, settings))
            {
                string[] line = null;
                while ((line = reader.Read()) != null)
                {
                    lines.Add(line);
                }
            }

            string result         = JsonConvert.SerializeObject(lines, Formatting.Indented);
            string expectedResult = jsonExpectedResult.ToString(Formatting.Indented);

            Console.WriteLine("Expects:");
            Console.WriteLine(expectedResult);
            Console.WriteLine();
            Console.WriteLine("Result:");
            Console.WriteLine(result);

            Assert.AreEqual(result, expectedResult);
        }
Ejemplo n.º 16
0
        private static V <double> Transform(string text, CsvReaderSettings csvReaderSettings)
        {
            using (var sr = new StringReader(text))
            {
                var csvReader = new CsvReader(csvReaderSettings);
                var splitted  = csvReader.Read(sr).ToArray();
                var rows      = splitted[0].Length;

                var v = V <double> .Generate <double>((row) =>
                {
                    double value;
                    if (row < rows && double.TryParse(splitted[0][row], NumberStyles.Any, CultureInfo.InvariantCulture, out value))
                    {
                        return(value);
                    }
                    return(0);
                }, rows);

                return(v);
            }
        }
Ejemplo n.º 17
0
        public void ParseQuotedNewLine()
        {
            // This is realy """A""","B","C"
            string content = "\"A\r\nA\",\"B\",\"C\"";

            var settings = new CsvReaderSettings()
            {
                HasHeaders = false
            };

            using (var reader = GetTestReader(content, settings))
            {
                Assert.AreEqual(3, reader.ColumnCount);

                var rows = reader.Rows.ToArray();


                Assert.AreEqual("A\r\nA", rows[0][0].ToString());
                Assert.AreEqual("B", rows[0][1].ToString());
                Assert.AreEqual("C", rows[0][2].ToString());
            }
        }
Ejemplo n.º 18
0
        private static void TestReadFromText()
        {
            string txt = @"""a"",""b"",""c""
"""",""d"",""""
"""","""",""e""
""f"","""",""""";

            CsvReaderSettings settings = new CsvReaderSettings();

            settings.ColumnSeparator = ',';


            using (MemoryStream ms = new MemoryStream(Encoding.Default.GetBytes(txt)))
                using (CsvReader reader = new CsvReader(ms, settings))
                {
                    string[] line = null;
                    while ((line = reader.Read()) != null)
                    {
                        Console.WriteLine(string.Join(string.Join(",", line.Select(c => string.Join(c, "\"", "\""))), "[", "]"));
                    }
                }
        }
Ejemplo n.º 19
0
        static void Read(string path)
        {
            CsvReaderSettings settings = new CsvReaderSettings();

            settings.ColumnSeparator   = ';';
            settings.Encoding          = Encoding.UTF8;
            settings.TextQualification = CsvTextQualification.AsNeeded;
            settings.TextQualifier     = '"';

            using (Stream stream = File.OpenRead(path))
                using (CsvReader reader = new CsvReader(stream, settings))
                {
                    string[] row = null;
                    int      i   = 1;
                    while ((row = reader.Read()) != null)
                    {
                        Console.WriteLine(string.Concat("Row ", i, ":"));
                        Console.WriteLine(string.Join("\t", row));
                        ++i;
                    }
                }
        }
Ejemplo n.º 20
0
 private static CsvReader GetTestReader(string content, CsvReaderSettings settings = null)
 {
     return(new CsvReader(StreamFromString(content), settings));
 }
Ejemplo n.º 21
0
 /// <summary>
 /// Initializes a new instance of the <see cref="CsvCachedReader"/> class.
 /// </summary>
 /// <param name="r">The r.</param>
 /// <param name="settings">The settings.</param>
 /// <param name="bufferSize">Size of the buffer.</param>
 public CsvCachedReader(TextReader r, CsvReaderSettings settings, int bufferSize)
     : base(r, settings, bufferSize)
 {
     _records = new List<string[]>();
     _currentRecordIndex = -1;
 }
Ejemplo n.º 22
0
 /// <summary>
 /// Initializes a new instance of the <see cref="CsvCachedReader"/> class.
 /// </summary>
 /// <param name="r">The r.</param>
 /// <param name="settings">The settings.</param>
 public CsvCachedReader(TextReader r, CsvReaderSettings settings)
     : this(r, settings, DefaultBufferSize) { }