public void ShouldWriteSchemaIfExplicit() { StringWriter stringWriter = new StringWriter(); // Explicitly indicate that the first record is NOT the schema SeparatedValueSchema schema = new SeparatedValueSchema(); schema.AddColumn(new StringColumn("Col1")); SeparatedValueWriter writer = new SeparatedValueWriter(stringWriter, schema, new SeparatedValueOptions() { IsFirstRecordSchema = false }); writer.WriteSchema(); // Explicitly write the schema writer.Write(new string[] { "a" }); StringReader stringReader = new StringReader(stringWriter.ToString()); var reader = new SeparatedValueReader(stringReader, new SeparatedValueOptions() { IsFirstRecordSchema = true }); var parsedSchema = reader.GetSchema(); Assert.Equal(schema.ColumnDefinitions.Count, parsedSchema.ColumnDefinitions.Count); Assert.Equal(schema.ColumnDefinitions[0].ColumnName, parsedSchema.ColumnDefinitions[0].ColumnName); Assert.True(reader.Read(), "The record was not retrieved after the schema."); Assert.False(reader.Read(), "Encountered more than the expected number of records."); }
public void TestGetSchema_NotExtracted_Throws() { string text = "a,b,c"; SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = false }; IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options); parser.GetSchema(); }
public void TestGetSchema_NotExtracted_Throws() { string text = "a,b,c"; StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = false }; IReader parser = new SeparatedValueReader(stringReader, options); Assert.Throws <InvalidOperationException>(() => parser.GetSchema()); }
public void TestGetSchema_Extracted_ReturnsColumnNames() { string text = "a,b,c"; StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; IReader parser = new SeparatedValueReader(stringReader, options); ISchema schema = parser.GetSchema(); Assert.True(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings."); string[] actual = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray(); string[] expected = new string[] { "a", "b", "c" }; Assert.Equal(expected, actual); }
public void TestGetSchema_FirstRecordSchema_TooManyColumns_IgnoresTrailing() { const string text = @"id,name,created 123,Bob,1/19/2013,Hello"; StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; SeparatedValueReader parser = new SeparatedValueReader(stringReader, options); Assert.IsTrue(parser.Read(), "The record could not be read."); Assert.AreEqual(parser.GetSchema().ColumnDefinitions.Count, parser.GetValues().Length); }
public void TestGetSchema_Extracted_ReturnsColumnNames() { string text = "a,b,c"; SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), options); ISchema schema = parser.GetSchema(); Assert.IsTrue(schema.ColumnDefinitions.All(d => d is StringColumn), "Not all of the columns were treated as strings."); string[] actual = schema.ColumnDefinitions.Select(d => d.ColumnName).ToArray(); string[] expected = new string[] { "a", "b", "c" }; CollectionAssert.AreEqual(expected, actual, "The schema was not extracted as expected."); }
public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord() { const string text = @"id,name,created"; SeparatedValueSchema schema = new SeparatedValueSchema(); schema.AddColumn(new Int32Column("id")) .AddColumn(new StringColumn("name")) .AddColumn(new DateTimeColumn("created")); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; IReader parser = new SeparatedValueReader(new MemoryStream(Encoding.Default.GetBytes(text)), schema, options); ISchema actual = parser.GetSchema(); Assert.AreSame(schema, actual, "The schema was passed did not take priority."); Assert.IsFalse(parser.Read(), "The schema record was not skipped."); }
public void TestGetSchema_SchemaProvided_FirstRecordSchema_SkipsFirstRecord() { const string text = @"id,name,created"; SeparatedValueSchema schema = new SeparatedValueSchema(); schema.AddColumn(new Int32Column("id")) .AddColumn(new StringColumn("name")) .AddColumn(new DateTimeColumn("created")); StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; IReader parser = new SeparatedValueReader(stringReader, schema, options); ISchema actual = parser.GetSchema(); Assert.Same(schema, actual); Assert.False(parser.Read(), "The schema record was not skipped."); }
public Core.ResultDefinition ValidateFile() { var schemaData = string.Empty; var schemaParser = new SchemaParser(); var validationResults = new List <ValidationResult>(); var validator = new Validators.FixedLengthValidator(); if (this.RawSchema == null) { try { using (var sr = new StreamReader(File.OpenRead(this.SchemaFilePath))) { schemaData = sr.ReadToEnd(); } } catch (Exception ex) { var errorMsg = String.Format(null, SharedResources.SchemaError, ex.Message); Console.WriteLine($"{errorMsg}"); } this.RawSchema = schemaParser.ParseSchema(schemaData); } var separatedFileSchemaBuilder = new SeparatedSchemaBuilder(); var separatedFileSchema = separatedFileSchemaBuilder.BuildSchema(this.RawSchema); var schemas = separatedFileSchema.SeparatedRecordSchemas; var count = 0; var options = new SeparatedValueOptions { Separator = separatedFileSchema.Delimeter, PreserveWhiteSpace = true }; foreach (var inputLine in File.ReadLines(this.FilePath)) { Console.WriteLine($"Started parsing line...{count}"); if (inputLine != null) { SeparatedValueSchema actualSchema; TextReader stringReader = new StringReader(inputLine); var schema = schemas.FirstOrDefault(x => inputLine.StartsWith(x.RecordIdentifier)); count++; var innerResults = new List <ValidationResult>(); var line = inputLine; if (schema != null) { var parser = new SeparatedValueReader(stringReader, schema.SeparatedValueSchema, options); if (!inputLine.Contains(options.Separator)) { var error = new ValidationResult { Record = inputLine, ErrorMessages = new List <string> { $"Column Separator Error: Record could not be parsed as separator {options.Separator} defined in schema is not found.", }, HasErrors = true, RowNumber = count, }; validationResults.Add(error); continue; } try { actualSchema = parser.GetSchema(); parser.Read(); var values = parser.GetValues(); for (int i = 0; i < values.Length; i++) { var validationResult = new ValidationResult { Record = inputLine, ColumnName = actualSchema.ColumnDefinitions[i].ColumnName, ColumnType = actualSchema.ColumnDefinitions[i].ColumnType.Name, ActualRowLength = inputLine.Length, MaxColumns = actualSchema.ColumnDefinitions.HandledCount, ParsedValueLength = (values[i].ToString() ?? string.Empty).Length, RawValueLength = (values[i].ToString() ?? string.Empty).Length, ParsedValue = values[i].ToString() ?? string.Empty, RawValue = values[i].ToString() ?? string.Empty, RowNumber = count, ParsedValuesCount = values.Count() }; } } catch (Exception ex) { var errorMsg = string.Empty; if (ex.InnerException != null && ex.InnerException.Message != null) { errorMsg = ex.InnerException.Message; Console.WriteLine(ex.InnerException.Message); } else { errorMsg = ex.Message; Console.WriteLine(ex.Message); } var error = new ValidationResult { RowNumber = count, Record = inputLine, ErrorMessages = new List <string> { $"Exception: {errorMsg}" }, HasErrors = true }; validationResults.Add(error); } } else { var error = new ValidationResult { RowNumber = count, Record = inputLine, ErrorMessages = new List <string> { $"Exception: The row does not contain a record identifier that match the schema." }, HasErrors = true }; validationResults.Add(error); } } } var results = new Core.ResultDefinition { Results = validationResults, TotalLinesProcessed = count, FileFormat = this.RawSchema.FileFormat }; return(results); }
public void TestGetSchema_FirstRecordSchema_TooManyColumns_IgnoresTrailing() { const string text = @"id,name,created 123,Bob,1/19/2013,Hello"; StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = true }; SeparatedValueReader parser = new SeparatedValueReader(stringReader, options); Assert.True(parser.Read(), "The record could not be read."); Assert.Equal(parser.GetSchema().ColumnDefinitions.Count, parser.GetValues().Length); ; }
public void TestGetSchema_NotExtracted_Throws() { string text = "a,b,c"; StringReader stringReader = new StringReader(text); SeparatedValueOptions options = new SeparatedValueOptions() { IsFirstRecordSchema = false }; IReader parser = new SeparatedValueReader(stringReader, options); Assert.Throws<InvalidOperationException>(() => parser.GetSchema()); }