private static Row GetBuilder(Result result, string aggregate, bool distinct) { var processName = "Dp" + result.Name[0].ToString(CultureInfo.InvariantCulture).ToUpper() + result.Name.Substring(1); var builder = new ProcessBuilder(processName) .Star(aggregate) .StarEnabled(false) .Connection("input") .Provider("internal") .Connection("output") .Provider("internal") .Entity(aggregate) .DetectChanges(false) .InputOperation(new RowsOperation(result.Rows)) .Group() .Field("group") .Input(false) .Default("group") .Aggregate("group") .PrimaryKey(); foreach (var field in result.Fields) { builder .Field(field.Name) .Length(field.Length) .Type(field.Type) .Aggregate(aggregate) .Distinct(distinct); } return ProcessFactory.CreateSingle(builder.Process()).Execute().First(); }
public Result Import(string resource, decimal sample = 100m) { AbstractConnection input; bool noLock; ConnectionModifier modifier; try { var userDefined = ProcessFactory.CreateSingle("DataProfiler", new Options() { Mode = "metadata"}); input = userDefined.Connections["input"]; modifier = new ConnectionModifier(resource); modifier.Modify(ref input); var hasEntity = userDefined.Entities.Any(); sample = sample > 0m && sample < 100m ? sample : hasEntity ? userDefined.Entities[0].Sample : 100m; noLock = !hasEntity || userDefined.Entities[0].NoLock; TflLogger.Info(userDefined.Name, modifier.Name, "Sample: {0:###} percent, NoLock: {1}", sample, noLock); } catch { throw new DataProfilerException("You must define a DataProfiler process with an 'input' connection in the transformalize configuration section."); } var cleanName = Regex.Replace(modifier.Name, "[^a-zA-Z]", string.Empty); var builder = new ProcessBuilder("Dp" + cleanName[0].ToString(CultureInfo.InvariantCulture).ToUpper() + cleanName.Substring(1)) .Connection("input") .Provider(input.Type) .Server(input.Server) .Database(input.Database) .User(input.User) .Password(input.Password) .Port(input.Port) .ConnectionString(input.GetConnectionString()) .Connection("output") .Provider("internal") .Entity(modifier.Name) .DetectChanges(false) .NoLock(noLock) .Sample(sample) .Schema(modifier.Schema); var process = ProcessFactory.CreateSingle(builder.Process()); var result = new Result { Name = modifier.Name, Fields = process.Entities[0].Fields, Rows = process.Execute(), Provider = input.Type.ToString() }; result.Properties["server"] = input.Server; result.Properties["database"] = input.Database; result.Properties["schema"] = modifier.Schema; result.Properties["table"] = modifier.Name; result.Properties["port"] = input.Port.ToString(CultureInfo.InvariantCulture); return result; }
public Result Import(string file, decimal sample = 100m) { var fileInspection = new ConfigurationFactory("DataProfiler").CreateSingle().FileInspection.GetInspectionRequest(); fileInspection.Sample = sample > 0m || sample < 100m ? sample : fileInspection.Sample; var response = new FileImporter().Import( new FileInfo(file), fileInspection, new ConnectionConfigurationElement() { Name = "output", Provider = "internal" } ); var result = new Result { Fields = response.Information.Fields, Rows = response.Rows, Provider = "file" }; result.Properties["filename"] = response.Information.FileInfo.FullName; result.Properties["delimiter"] = response.Information.Delimiter.ToString(CultureInfo.InvariantCulture); return result; }
public Dictionary<string, Row> Profile(Result result) { var i = 0; var profile = result.Fields.ToDictionary(field => field.Name, field => new Row() { { "field", field.Name }, { "type", field.Type}, { "index", ++i} }); var aggregates = new Dictionary<string, bool> { {"min", false}, {"max", false}, {"minlength", false}, {"maxlength", false}, {"count", true} }; foreach (var pair in aggregates) { AddToProfile(ref profile, result, pair.Key, pair.Value); } return profile; }
private static void AddToProfile(ref Dictionary<string, Row> profile, Result result, string aggregate, bool distinct) { var minRow = GetBuilder(result, aggregate, distinct); foreach (var column in minRow.Columns.Where(c => !c.Equals("group"))) { profile[column][aggregate] = minRow[column]; } }