public FileImportResult Import(FileInspectionRequest request, TflConnection output) { var fileInformation = FileInformationFactory.Create(request, _logger); var cfg = BuildProcess(fileInformation, request, output, _logger); if (cfg.Connections.First(c => c.Name == "output").Provider == "internal") { // nothing to init, so just run in default mode return(new FileImportResult { Information = fileInformation, Rows = ProcessFactory.CreateSingle(cfg, _logger).Execute() }); } // first run in init mode cfg.Mode = "init"; var process = ProcessFactory.CreateSingle(cfg, _logger, new Options { Mode = "init" }); process.ExecuteScaler(); // now run in default mode cfg.Mode = "default"; process = ProcessFactory.CreateSingle(cfg, _logger, new Options() { Mode = "default" }); return(new FileImportResult { Information = fileInformation, Rows = process.Execute(), RowCount = process.Entities[0].Inserts }); }
public void TestMultipleDelimiters() { var file = Path.GetTempFileName(); File.WriteAllText(file, @"f|1,f|2,f|3,f|4,f|5 v|1,v|;2,v|3,v|4,v|5 v|6,v|;7,v|8,v9,v|10, v|11,v|;12,v|13,v|14,v|15"); var request = new FileInspectionRequest(file); var actual = FileInformationFactory.Create(request, new TestLogger()); foreach ( var delimiter in request.Delimiters.Select(p => p.Value) .Where(d => d.AveragePerLine > 0) .OrderBy(d => d.CoefficientOfVariance())) { Console.WriteLine("Delimiter: `{0}` CoV: {1} Average: {2} StdDev: {3}", delimiter.Character, delimiter.CoefficientOfVariance(), delimiter.AveragePerLine, delimiter.StandardDeviation); } Assert.AreEqual(6, actual.Fields.Count); Assert.AreEqual('|', actual.Delimiter); Assert.AreEqual("A", actual.Fields[0].Name); Assert.AreEqual("B", actual.Fields[1].Name); Assert.AreEqual("C", actual.Fields[2].Name); Assert.AreEqual("D", actual.Fields[3].Name); Assert.AreEqual("E", actual.Fields[4].Name); Assert.AreEqual("F", actual.Fields[5].Name); }
public Lines(FileSystemInfo fileInfo, FileInspectionRequest request, ILogger logger) { _fileInfo = fileInfo; _isCsv = fileInfo.Extension.Equals(".csv", StringComparison.OrdinalIgnoreCase); _request = request; _logger = logger; _storage.AddRange(new LineLoader(fileInfo, request).Load()); }
public void TestFieldQuotedCsv() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, @"""State"",""Population"",""Shape"" MI,""10,000,000"",Mitten CA,""20,000,000"",Sock KS,""9,000,000"",Rectangle"); var request = new FileInspectionRequest(file) { DataTypes = new List<string> { "decimal" }, MinLength = 3 }; var fileInformation = FileInformationFactory.Create(request, new TestLogger()); var actual = new FieldInspector(new TestLogger()).Inspect(fileInformation, request); Assert.AreEqual(3, actual.Count); Assert.AreEqual("State", actual[0].Name); Assert.AreEqual("Population", actual[1].Name); Assert.AreEqual("Shape", actual[2].Name); Assert.AreEqual("string", actual[0].Type); Assert.AreEqual("decimal", actual[1].Type); Assert.AreEqual("string", actual[2].Type); Assert.AreEqual('\"', actual[0].QuotedWith); Assert.AreEqual('\"', actual[1].QuotedWith); Assert.AreEqual('\"', actual[2].QuotedWith); Assert.AreEqual("3", actual[0].Length); Assert.AreEqual("3", actual[1].Length); Assert.AreEqual("10", actual[2].Length); }
public Line(string content, char quote, FileInspectionRequest request) { _content = content; _quote = quote; foreach (var pair in request.Delimiters) { _values[pair.Key] = content.DelimiterSplit(pair.Key, quote); } }
public FileImportResult Import(FileInspectionRequest request, TflConnection output) { var fileInformation = FileInformationFactory.Create(request, _logger); var cfg = BuildProcess(fileInformation, request, output, _logger); if (cfg.Connections.First(c => c.Name == "output").Provider == "internal") { // nothing to init, so just run in default mode return new FileImportResult { Information = fileInformation, Rows = ProcessFactory.CreateSingle(cfg, _logger).Execute() }; } // first run in init mode cfg.Mode = "init"; var process = ProcessFactory.CreateSingle(cfg, _logger, new Options { Mode = "init" }); process.ExecuteScaler(); // now run in default mode cfg.Mode = "default"; process = ProcessFactory.CreateSingle(cfg, _logger, new Options() { Mode = "default" }); return new FileImportResult { Information = fileInformation, Rows = process.Execute(), RowCount = process.Entities[0].Inserts }; }
public Line(string content, FileInspectionRequest request) { _content = content; foreach (var pair in request.Delimiters) { _values[pair.Key] = content.Split(pair.Key); } }
public void TestExcel() { const string fileName = @"TestFiles\Headers\Headers.xlsx"; var request = new FileInspectionRequest(fileName); var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(3, actual.ColumnCount()); Assert.AreEqual("Header 2", actual.Fields[1].Name); }
public void TestCommas() { const string fileName = @"TestFiles\Headers\Headers.csv"; var request = new FileInspectionRequest(fileName) { LineLimit = 3 }; var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(',', actual.Delimiter); Assert.AreEqual(3, actual.ColumnCount()); Assert.AreEqual("Header 2", actual.Fields[1].Name); }
public void TestSingleColumn() { const string fileName = @"TestFiles\Headers\Single.txt"; var request = new FileInspectionRequest(fileName); var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(default(char), actual.Delimiter); Assert.AreEqual(1, actual.ColumnCount()); Assert.AreEqual("Header 1", actual.Fields[0].Name); Assert.AreEqual("1024", actual.Fields[0].Length); }
public static FileInformation Create(FileInspectionRequest request, ILogger logger, string processName = null, string entityName = null) { var ext = request.FileInfo.Extension.ToLower(); var isExcel = ext.StartsWith(".xls", StringComparison.OrdinalIgnoreCase); var fileInformation = isExcel ? new ExcelInformationReader(request).Read(request.FileInfo) : new FileInformationReader(request, logger).Read(request.FileInfo); var validator = new ColumnNameValidator(fileInformation.Fields.Select(f => f.Name).ToArray()); if (validator.Valid()) return fileInformation; fileInformation.FirstRowIsHeader = false; for (var i = 0; i < fileInformation.Fields.Count(); i++) { fileInformation.Fields[i].Name = ColumnNameGenerator.CreateDefaultColumnName(i); } return fileInformation; }
private static TflProcess BuildProcess( FileInformation fileInformation, FileInspectionRequest request, TflConnection output, ILogger logger) { var process = new TflRoot().GetDefaultOf <TflProcess>(p => { p.Name = request.EntityName; p.Star = request.ProcessName; p.StarEnabled = false; p.ViewEnabled = false; p.PipelineThreading = "MultiThreaded"; p.Connections = new List <TflConnection> { p.GetDefaultOf <TflConnection>(c => { c.Name = "input"; c.Provider = "file"; c.File = fileInformation.FileInfo.FullName; c.Delimiter = fileInformation.Delimiter == default(char) ? "|" : fileInformation.Delimiter.ToString(CultureInfo.InvariantCulture); c.Start = fileInformation.FirstRowIsHeader ? 2 : 1; }), output }; p.Entities = new List <TflEntity> { p.GetDefaultOf <TflEntity>(e => { e.Name = request.EntityName; e.Connection = "input"; e.PrependProcessNameToOutputName = false; e.DetectChanges = false; e.Fields = GetFields(p, new FieldInspector(logger).Inspect(fileInformation, request), logger, request.EntityName); }) }; }); return(process); }
private static TflProcess BuildProcess( FileInformation fileInformation, FileInspectionRequest request, TflConnection output, ILogger logger) { var process = new TflProcess{ Name = request.EntityName, Star = request.ProcessName, StarEnabled = false, ViewEnabled = false, PipelineThreading = "MultiThreaded", Connections = new List<TflConnection> { new TflConnection{ Name = "input", Provider = "file", File = fileInformation.FileInfo.FullName, Delimiter = fileInformation.Delimiter == default(char) ? "|" : fileInformation.Delimiter.ToString(CultureInfo.InvariantCulture), Start = fileInformation.FirstRowIsHeader ? 2 : 1 }.WithDefaults(), output }, Entities = new List<TflEntity> { new TflEntity{ Name = request.EntityName, Connection = "input", PrependProcessNameToOutputName = false, DetectChanges = false, Fields = GetFields(new FieldInspector(logger).Inspect(fileInformation, request), logger, request.EntityName) }.WithDefaults() } }.WithDefaults(); return process; }
private static TflProcess BuildProcess( FileInformation fileInformation, FileInspectionRequest request, TflConnection output, ILogger logger) { var process = new TflRoot().GetDefaultOf<TflProcess>(p => { p.Name = request.EntityName; p.Star = request.ProcessName; p.StarEnabled = false; p.ViewEnabled = false; p.PipelineThreading = "MultiThreaded"; p.Connections = new List<TflConnection> { p.GetDefaultOf<TflConnection>(c => { c.Name = "input"; c.Provider = "file"; c.File = fileInformation.FileInfo.FullName; c.Delimiter = fileInformation.Delimiter == default(char) ? "|" : fileInformation.Delimiter.ToString(CultureInfo.InvariantCulture); c.Start = fileInformation.FirstRowIsHeader ? 2 : 1; }), output }; p.Entities = new List<TflEntity> { p.GetDefaultOf<TflEntity>(e => { e.Name = request.EntityName; e.Connection = "input"; e.PrependProcessNameToOutputName = false; e.DetectChanges = false; e.Fields = GetFields(p, new FieldInspector(logger).Inspect(fileInformation, request), logger, request.EntityName); }) }; }); return process; }
public static FileInformation Create(FileInspectionRequest request, ILogger logger, string processName = null, string entityName = null) { var ext = request.FileInfo.Extension.ToLower(); var isExcel = ext.StartsWith(".xls", StringComparison.OrdinalIgnoreCase); var fileInformation = isExcel ? new ExcelInformationReader(request).Read(request.FileInfo) : new FileInformationReader(request, logger).Read(request.FileInfo); var validator = new ColumnNameValidator(fileInformation.Fields.Select(f => f.Name).ToArray()); if (validator.Valid()) { return(fileInformation); } fileInformation.FirstRowIsHeader = false; for (var i = 0; i < fileInformation.Fields.Count(); i++) { fileInformation.Fields[i].Name = ColumnNameGenerator.CreateDefaultColumnName(i); } return(fileInformation); }
/// <summary> /// Imports a file to an output with provided file inpections settings /// </summary> /// <param name="request"></param> /// <param name="output"></param> /// <returns></returns> public long ImportScaler(FileInspectionRequest request, TflConnection output) { return Import(request, output).RowCount; }
public FileInformationReader(FileInspectionRequest request, ILogger logger) { _request = request; _logger = logger; }
public void TestIssue001A() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, @"t1,t2,t3,t4 Monday,10,1.1,1/1/2014 Tuesday,11,2.2,2/1/2014 Wednesday,12,3.3,3/1/2014 Thursday,13,4.4,4/1/2014 Friday,14,5.5,5/1/2014 Saturday,15,6.6,6/1/2014"); var request = new FileInspectionRequest(file) { DataTypes = new List<string> { "int32", "double", "datetime" } }; var information = FileInformationFactory.Create(request, new TestLogger()); var fields = new FieldInspector(new TestLogger()).Inspect(information, request).ToArray(); Assert.AreEqual("string", fields[0].Type); Assert.AreEqual("int32", fields[1].Type); Assert.AreEqual("double", fields[2].Type); Assert.AreEqual("datetime", fields[3].Type); //really do it //new FileImporter().Import(new FileInfo(file), request); }
public void TestCsvBlanks() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, @"t1,t2,t3,t4,t5 ""Monday"",10,""1.1"",1/1/2014, ""Tuesday"",11,""2.2"",2/1/2014, ""Wednesday"",12,""3.3"",3/1/2014, ""Thursday"",13,""4.4"",4/1/2014, ""Friday"",14,,5/1/2014, ""Saturday"",15,,6/1/2014,"); var request = new FileInspectionRequest(file) { DataTypes = new List<string> { "int32", "double", "datetime" }, IgnoreEmpty = true }; var information = FileInformationFactory.Create(request, new TestLogger()); var fields = new FieldInspector(new TestLogger()).Inspect(information, request).ToArray(); Assert.AreEqual('"', fields[0].QuotedWith); Assert.AreEqual("string", fields[0].Type); Assert.AreEqual("int32", fields[1].Type); Assert.AreEqual('"', fields[2].QuotedWith); Assert.AreEqual("double", fields[2].Type); Assert.AreEqual("datetime", fields[3].Type); Assert.AreEqual("string", fields[4].Type); Assert.AreEqual("1", fields[4].Length); }
/// <summary> /// Imports a file to an output with provided file inpections settings /// </summary> /// <param name="request"></param> /// <param name="output"></param> /// <returns></returns> public long ImportScaler(FileInspectionRequest request, TflConnection output) { return(Import(request, output).RowCount); }
public void TestIssue002B() { const string file = @"TestFiles\Headers\Issue002.xlsx"; var request = new FileInspectionRequest(file) { DataTypes = new List<string> { "int32", "datetime" } }; var information = FileInformationFactory.Create(request, new TestLogger()); var fields = new FieldInspector(new TestLogger()).Inspect(information, request).ToArray(); Assert.AreEqual("string", fields[0].Type); Assert.AreEqual("int32", fields[1].Type); Assert.AreEqual("string", fields[2].Type); Assert.AreEqual("datetime", fields[3].Type); }
public LineLoader(FileSystemInfo fileInfo, FileInspectionRequest request) { _request = request; _loader = new FileLineLoader(fileInfo.FullName, request.LineLimit); _isCsv = fileInfo.Extension.Equals(".csv", StringComparison.OrdinalIgnoreCase); }
public Fields Inspect(FileInformation fileInformation, FileInspectionRequest request) { var process = new TflProcess{ Name = request.ProcessName, StarEnabled = false, ViewEnabled = false, PipelineThreading = "MultiThreaded" }.WithDefaults(); process.Connections = new List<TflConnection> { new TflConnection { Name = "input", Provider = "file", File = fileInformation.FileInfo.FullName, Delimiter = fileInformation.Delimiter == default(char) ? "|" : fileInformation.Delimiter.ToString(CultureInfo.InvariantCulture), Start = fileInformation.FirstRowIsHeader ? 2 : 1 }.WithDefaults(), new TflConnection { Name = "output", Provider = "internal" }.WithDefaults() }; process.Entities.Add(new TflEntity { Name = request.EntityName, PrependProcessNameToOutputName = false, DetectChanges = false, Sample = System.Convert.ToInt32(request.Sample) }.WithDefaults()); foreach (var fd in fileInformation.Fields) { var field = fd; process.Entities[0].Fields.Add(new TflField{ Name = field.Name, Length = field.Length, Type = field.Type, QuotedWith = field.QuotedWith }.WithDefaults()); } for (var i = 0; i < request.DataTypes.Count; i++) { var dataType = request.DataTypes[i]; foreach (var field in fileInformation.Fields) { var result = IsDataTypeField(field.Name, dataType); process.Entities[0].CalculatedFields.Add( new TflField{ Name = result, Type = "bool", Input = false, Transforms = new List<TflTransform> { new TflTransform { Method = "typeconversion", Type = dataType, Parameter = field.Name, IgnoreEmpty = request.IgnoreEmpty }.WithDefaults() } }.WithDefaults() ); } } foreach (var field in fileInformation.Fields) { var result = LengthField(field.Name); process.Entities[0].CalculatedFields.Add( new TflField { Name = result, Type = "int32", Transforms = new List<TflTransform> { new TflTransform { Method = "length", Parameter = field.Name }.WithDefaults() } }.WithDefaults() ); } var runner = ProcessFactory.CreateSingle(new TflRoot(process).Processes[0], _logger); var results = runner.Execute().ToList(); if (results.Count <= 0) { _logger.Warn("Nothing imported from in {0}!", fileInformation.FileInfo.Name); return fileInformation.Fields; } foreach (var field in fileInformation.Fields) { if (!results.All(row => row[field.Name].Equals(string.Empty))) { foreach (var dataType in request.DataTypes) { var result = IsDataTypeField(field.Name, dataType); if (!results.All(row => row[result].Equals(true))) continue; field.Type = dataType; field.Length = request.MinLength.ToString(CultureInfo.InvariantCulture); break; } } if (!field.Type.Equals("string")) continue; var length = results.Max(row => (int)row[LengthField(field.Name)]) + 1; if (request.MaxLength > 0 && length > request.MaxLength) { length = request.MaxLength; } if (request.MinLength > 0 && length < request.MinLength) { length = request.MinLength; } field.Length = length.ToString(CultureInfo.InvariantCulture); } return fileInformation.Fields; }
public ExcelInformationReader(FileInspectionRequest request) { _request = request; }
public void TestEmptyCsv() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, string.Empty); var request = new FileInspectionRequest(file); var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(0, actual.Fields.Count); }
public void TestCsvWithJustHeaders() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, @"State,Population,Shape"); var request = new FileInspectionRequest(file); var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(3, actual.Fields.Count); Assert.AreEqual("State", actual.Fields[0].Name); Assert.AreEqual("Population", actual.Fields[1].Name); Assert.AreEqual("Shape", actual.Fields[2].Name); Assert.AreEqual("string", actual.Fields[0].Type); Assert.AreEqual("string", actual.Fields[1].Type); Assert.AreEqual("string", actual.Fields[2].Type); Assert.IsTrue(actual.Fields[0].IsQuoted()); Assert.IsTrue(actual.Fields[1].IsQuoted()); Assert.IsTrue(actual.Fields[2].IsQuoted()); Assert.AreEqual("1024", actual.Fields[0].Length); Assert.AreEqual("1024", actual.Fields[1].Length); Assert.AreEqual("1024", actual.Fields[2].Length); }
public void TestFieldQuotedCsv() { var file = Path.GetTempFileName().Replace(".tmp", ".csv"); File.WriteAllText(file, @"State,Population,Shape MI,""10,000,000"",Mitten CA,""20,000,000"",Sock, KS,""9,000,000"",Rectangle"); var request = new FileInspectionRequest(file); var actual = FileInformationFactory.Create(request, new TestLogger()); Assert.AreEqual(3, actual.Fields.Count); Assert.AreEqual("State", actual.Fields[0].Name); Assert.AreEqual("Population", actual.Fields[1].Name); Assert.AreEqual("Shape", actual.Fields[2].Name); Assert.AreEqual("string", actual.Fields[0].Type); Assert.AreEqual("string", actual.Fields[1].Type); Assert.AreEqual("string", actual.Fields[2].Type); Assert.IsTrue(actual.Fields[0].IsQuoted()); Assert.IsTrue(actual.Fields[1].IsQuoted()); Assert.AreEqual('\"', actual.Fields[1].QuotedWith); Assert.IsTrue(actual.Fields[2].IsQuoted()); Assert.AreEqual("1024", actual.Fields[0].Length); Assert.AreEqual("1024", actual.Fields[1].Length); Assert.AreEqual("1024", actual.Fields[2].Length); }
public Fields Inspect(FileInformation fileInformation, FileInspectionRequest request) { var process = new TflRoot().GetDefaultOf <TflProcess>(p => { p.Name = request.ProcessName; p.StarEnabled = false; p.ViewEnabled = false; p.PipelineThreading = "MultiThreaded"; }); process.Connections = new List <TflConnection> { process.GetDefaultOf <TflConnection>(c => { c.Name = "input"; c.Provider = "file"; c.File = fileInformation.FileInfo.FullName; c.Delimiter = fileInformation.Delimiter == default(char) ? "|" : fileInformation.Delimiter.ToString(CultureInfo.InvariantCulture); c.Start = fileInformation.FirstRowIsHeader ? 2 : 1; }), process.GetDefaultOf <TflConnection>(c => { c.Name = "output"; c.Provider = "internal"; }) }; process.Entities.Add(process.GetDefaultOf <TflEntity>(e => { e.Name = request.EntityName; e.PrependProcessNameToOutputName = false; e.DetectChanges = false; e.Sample = System.Convert.ToInt32(request.Sample); })); foreach (var fd in fileInformation.Fields) { var field = fd; process.Entities[0].Fields.Add(process.GetDefaultOf <TflField>(f => { f.Name = field.Name; f.Length = field.Length; f.Type = field.Type; f.QuotedWith = field.QuotedWith; })); } for (var i = 0; i < request.DataTypes.Count; i++) { var dataType = request.DataTypes[i]; foreach (var field in fileInformation.Fields) { var result = IsDataTypeField(field.Name, dataType); process.Entities[0].CalculatedFields.Add( process.GetDefaultOf <TflField>(f => { f.Name = result; f.Type = "bool"; f.Input = false; f.Transforms = new List <TflTransform> { f.GetDefaultOf <TflTransform>(t => { t.Method = "typeconversion"; t.Type = dataType; t.Parameter = field.Name; t.IgnoreEmpty = request.IgnoreEmpty; }) }; }) ); } } foreach (var field in fileInformation.Fields) { var result = LengthField(field.Name); process.Entities[0].CalculatedFields.Add( process.GetDefaultOf <TflField>(f => { f.Name = result; f.Type = "int32"; f.Transforms = new List <TflTransform> { f.GetDefaultOf <TflTransform>(t => { t.Method = "length"; t.Parameter = field.Name; }) }; }) ); } var runner = ProcessFactory.CreateSingle(new TflRoot(process).Processes[0], _logger); var results = runner.Execute().ToList(); if (results.Count <= 0) { _logger.Warn("Nothing imported from in {0}!", fileInformation.FileInfo.Name); return(fileInformation.Fields); } foreach (var field in fileInformation.Fields) { if (!results.All(row => row[field.Name].Equals(string.Empty))) { foreach (var dataType in request.DataTypes) { var result = IsDataTypeField(field.Name, dataType); if (!results.All(row => row[result].Equals(true))) { continue; } field.Type = dataType; field.Length = request.MinLength.ToString(CultureInfo.InvariantCulture); break; } } if (!field.Type.Equals("string")) { continue; } var length = results.Max(row => (int)row[LengthField(field.Name)]) + 1; if (request.MaxLength > 0 && length > request.MaxLength) { length = request.MaxLength; } if (request.MinLength > 0 && length < request.MinLength) { length = request.MinLength; } field.Length = length.ToString(CultureInfo.InvariantCulture); } return(fileInformation.Fields); }