public void ShouldWriteSchemaIfExplicit()
        {
            StringWriter stringWriter = new StringWriter();
            // Explicitly indicate that the first record is NOT the schema
            SeparatedValueSchema schema = new SeparatedValueSchema();

            schema.AddColumn(new StringColumn("Col1"));
            SeparatedValueWriter writer = new SeparatedValueWriter(stringWriter, schema, new SeparatedValueOptions()
            {
                IsFirstRecordSchema = false
            });

            writer.WriteSchema();  // Explicitly write the schema
            writer.Write(new string[] { "a" });

            StringReader stringReader = new StringReader(stringWriter.ToString());
            var          reader       = new SeparatedValueReader(stringReader, new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            });
            var parsedSchema = reader.GetSchema();

            Assert.Equal(schema.ColumnDefinitions.Count, parsedSchema.ColumnDefinitions.Count);
            Assert.Equal(schema.ColumnDefinitions[0].ColumnName, parsedSchema.ColumnDefinitions[0].ColumnName);

            Assert.True(reader.Read(), "The record was not retrieved after the schema.");
            Assert.False(reader.Read(), "Encountered more than the expected number of records.");
        }
        public void TestGetSchema_NotExtracted_Throws()
        {
            string text = "a,b,c";
            SeparatedValueOptions options = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = false
            };
            IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options);

            parser.GetSchema();
        }
        public void TestGetSchema_NotExtracted_Throws()
        {
            string                text         = "a,b,c";
            StringReader          stringReader = new StringReader(text);
            SeparatedValueOptions options      = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = false
            };
            IReader parser = new SeparatedValueReader(stringReader, options);

            Assert.Throws <InvalidOperationException>(() => parser.GetSchema());
        }
 public void TestGetSchema_Extracted_ReturnsColumnNames()
 {
     string text = "a,b,c";
     StringReader stringReader = new StringReader(text);
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true };
     IReader parser = new SeparatedValueReader(stringReader, options);
     ISchema schema = parser.GetSchema();
     Assert.True(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings.");
     string[] actual = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray();
     string[] expected = new string[] { "a", "b", "c" };
     Assert.Equal(expected, actual);
 }
        public void TestGetSchema_FirstRecordSchema_TooManyColumns_IgnoresTrailing()
        {
            const string          text         = @"id,name,created
123,Bob,1/19/2013,Hello";
            StringReader          stringReader = new StringReader(text);
            SeparatedValueOptions options      = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            };
            SeparatedValueReader parser = new SeparatedValueReader(stringReader, options);

            Assert.IsTrue(parser.Read(), "The record could not be read.");
            Assert.AreEqual(parser.GetSchema().ColumnDefinitions.Count, parser.GetValues().Length);
        }
        public void TestGetSchema_Extracted_ReturnsColumnNames()
        {
            string text = "a,b,c";
            SeparatedValueOptions options = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            };
            IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options);
            ISchema schema = parser.GetSchema();

            Assert.IsTrue(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings.");
            string[] actual   = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray();
            string[] expected = new string[] { "a", "b", "c" };
            CollectionAssert.AreEqual(expected, actual, "The schema was not extracted as expected.");
        }
        public void TestGetSchema_Extracted_ReturnsColumnNames()
        {
            string                text         = "a,b,c";
            StringReader          stringReader = new StringReader(text);
            SeparatedValueOptions options      = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            };
            IReader parser = new SeparatedValueReader(stringReader, options);
            ISchema schema = parser.GetSchema();

            Assert.True(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings.");
            string[] actual   = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray();
            string[] expected = new string[] { "a", "b", "c" };
            Assert.Equal(expected, actual);
        }
        public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord()
        {
            const string         text   = @"id,name,created";
            SeparatedValueSchema schema = new SeparatedValueSchema();

            schema.AddColumn(new Int32Column("id"))
            .AddColumn(new StringColumn("name"))
            .AddColumn(new DateTimeColumn("created"));
            SeparatedValueOptions options = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            };
            IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), schema, options);
            ISchema actual = parser.GetSchema();

            Assert.AreSame(schema, actual, "The schema was passed did not take priority.");
            Assert.IsFalse(parser.Read(), "The schema record was not skipped.");
        }
        public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord()
        {
            const string         text   = @"id,name,created";
            SeparatedValueSchema schema = new SeparatedValueSchema();

            schema.AddColumn(new Int32Column("id"))
            .AddColumn(new StringColumn("name"))
            .AddColumn(new DateTimeColumn("created"));

            StringReader          stringReader = new StringReader(text);
            SeparatedValueOptions options      = new SeparatedValueOptions()
            {
                IsFirstRecordSchema = true
            };
            IReader parser = new SeparatedValueReader(stringReader, schema, options);
            ISchema actual = parser.GetSchema();

            Assert.Same(schema, actual);
            Assert.False(parser.Read(), "The schema record was not skipped.");
        }
Beispiel #10
0
        public Core.ResultDefinition ValidateFile()
        {
            var schemaData        = string.Empty;
            var schemaParser      = new SchemaParser();
            var validationResults = new List <ValidationResult>();
            var validator         = new Validators.FixedLengthValidator();

            if (this.RawSchema == null)
            {
                try
                {
                    using (var sr = new StreamReader(File.OpenRead(this.SchemaFilePath)))
                    {
                        schemaData = sr.ReadToEnd();
                    }
                }
                catch (Exception ex)
                {
                    var errorMsg = String.Format(null, SharedResources.SchemaError, ex.Message);
                    Console.WriteLine($"{errorMsg}");
                }

                this.RawSchema = schemaParser.ParseSchema(schemaData);
            }

            var separatedFileSchemaBuilder = new SeparatedSchemaBuilder();
            var separatedFileSchema        = separatedFileSchemaBuilder.BuildSchema(this.RawSchema);
            var schemas = separatedFileSchema.SeparatedRecordSchemas;
            var count   = 0;
            var options = new SeparatedValueOptions
            {
                Separator          = separatedFileSchema.Delimeter,
                PreserveWhiteSpace = true
            };

            foreach (var inputLine in File.ReadLines(this.FilePath))
            {
                Console.WriteLine($"Started parsing line...{count}");

                if (inputLine != null)
                {
                    SeparatedValueSchema actualSchema;
                    TextReader           stringReader = new StringReader(inputLine);
                    var schema = schemas.FirstOrDefault(x => inputLine.StartsWith(x.RecordIdentifier));
                    count++;
                    var innerResults = new List <ValidationResult>();
                    var line         = inputLine;
                    if (schema != null)
                    {
                        var parser = new SeparatedValueReader(stringReader, schema.SeparatedValueSchema, options);

                        if (!inputLine.Contains(options.Separator))
                        {
                            var error = new ValidationResult
                            {
                                Record        = inputLine,
                                ErrorMessages = new List <string>
                                {
                                    $"Column Separator Error: Record could not be parsed as separator {options.Separator} defined in schema is not found.",
                                },
                                HasErrors = true,
                                RowNumber = count,
                            };
                            validationResults.Add(error);

                            continue;
                        }

                        try
                        {
                            actualSchema = parser.GetSchema();
                            parser.Read();
                            var values = parser.GetValues();

                            for (int i = 0; i < values.Length; i++)
                            {
                                var validationResult = new ValidationResult
                                {
                                    Record            = inputLine,
                                    ColumnName        = actualSchema.ColumnDefinitions[i].ColumnName,
                                    ColumnType        = actualSchema.ColumnDefinitions[i].ColumnType.Name,
                                    ActualRowLength   = inputLine.Length,
                                    MaxColumns        = actualSchema.ColumnDefinitions.HandledCount,
                                    ParsedValueLength = (values[i].ToString() ?? string.Empty).Length,
                                    RawValueLength    = (values[i].ToString() ?? string.Empty).Length,
                                    ParsedValue       = values[i].ToString() ?? string.Empty,
                                    RawValue          = values[i].ToString() ?? string.Empty,
                                    RowNumber         = count,
                                    ParsedValuesCount = values.Count()
                                };
                            }
                        }
                        catch (Exception ex)
                        {
                            var errorMsg = string.Empty;
                            if (ex.InnerException != null && ex.InnerException.Message != null)
                            {
                                errorMsg = ex.InnerException.Message;
                                Console.WriteLine(ex.InnerException.Message);
                            }
                            else
                            {
                                errorMsg = ex.Message;
                                Console.WriteLine(ex.Message);
                            }

                            var error = new ValidationResult
                            {
                                RowNumber     = count,
                                Record        = inputLine,
                                ErrorMessages = new List <string>
                                {
                                    $"Exception: {errorMsg}"
                                },
                                HasErrors = true
                            };
                            validationResults.Add(error);
                        }
                    }
                    else
                    {
                        var error = new ValidationResult
                        {
                            RowNumber     = count,
                            Record        = inputLine,
                            ErrorMessages = new List <string>
                            {
                                $"Exception: The row does not contain a record identifier that match the schema."
                            },
                            HasErrors = true
                        };
                        validationResults.Add(error);
                    }
                }
            }
            var results = new Core.ResultDefinition
            {
                Results             = validationResults,
                TotalLinesProcessed = count,
                FileFormat          = this.RawSchema.FileFormat
            };

            return(results);
        }
 public void TestGetSchema_FirstRecordSchema_TooManyColumns_IgnoresTrailing()
 {
     const string text = @"id,name,created
     123,Bob,1/19/2013,Hello";
     StringReader stringReader = new StringReader(text);
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true };
     SeparatedValueReader parser = new SeparatedValueReader(stringReader, options);
     Assert.True(parser.Read(), "The record could not be read.");
     Assert.Equal(parser.GetSchema().ColumnDefinitions.Count, parser.GetValues().Length);
     ;
 }
        public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord()
        {
            const string text = @"id,name,created";
            SeparatedValueSchema schema = new SeparatedValueSchema();
            schema.AddColumn(new Int32Column("id"))
                  .AddColumn(new StringColumn("name"))
                  .AddColumn(new DateTimeColumn("created"));

            StringReader stringReader = new StringReader(text);
            SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true };
            IReader parser = new SeparatedValueReader(stringReader, schema, options);
            ISchema actual = parser.GetSchema();
            Assert.Same(schema, actual);
            Assert.False(parser.Read(), "The schema record was not skipped.");
        }
 public void TestGetSchema_NotExtracted_Throws()
 {
     string text = "a,b,c";
     StringReader stringReader = new StringReader(text);
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = false };
     IReader parser = new SeparatedValueReader(stringReader, options);
     Assert.Throws<InvalidOperationException>(() => parser.GetSchema());
 }
 public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord()
 {
     const string text = @"id,name,created";
     SeparatedValueSchema schema = new SeparatedValueSchema();
     schema.AddColumn(new Int32Column("id"))
           .AddColumn(new StringColumn("name"))
           .AddColumn(new DateTimeColumn("created"));
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true };
     IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), schema, options);
     ISchema actual = parser.GetSchema();
     Assert.AreSame(schema, actual, "The schema was passed did not take priority.");
     Assert.IsFalse(parser.Read(), "The schema record was not skipped.");
 }
 public void TestGetSchema_NotExtracted_Throws()
 {
     string text = "a,b,c";
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = false };
     IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options);
     parser.GetSchema();
 }
 public void TestGetSchema_Extracted_ReturnsColumnNames()
 {
     string text = "a,b,c";
     SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true };
     IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options);
     ISchema schema = parser.GetSchema();
     Assert.IsTrue(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings.");
     string[] actual = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray();
     string[] expected = new string[] { "a", "b", "c" };
     CollectionAssert.AreEqual(expected, actual, "The schema was not extracted as expected.");
 }