public string GetBlobFolderPath(string datasetName) { Dataset dataset = Datasets.Single(x => x.Name == datasetName); AzureBlobDataset outputProps = (AzureBlobDataset)dataset.Properties.TypeProperties; return(outputProps.FolderPath); }
private static void LogBlobDataSetInfo(AzureBlobDataset blobDataset, IActivityLogger logger) { logger.Write("\n******** Blob Storage info ********"); logger.Write("\nBlob folder: " + blobDataset.FolderPath); logger.Write("\nBlob format: " + blobDataset.Format); var partitions = blobDataset.PartitionedBy?.Count ?? 0; logger.Write($"\nPartitions ({partitions}):"); for (int i = 0; i < partitions; i++) { logger.Write( $"\n\t{blobDataset.PartitionedBy?[i].Name ?? "null"}: {blobDataset.PartitionedBy?[i]?.Value}"); } logger.Write("\nBlob file: " + blobDataset.FileName); if (blobDataset.FolderPath.IndexOf("/", StringComparison.InvariantCulture) <= 0) { throw new Exception($"Can't find container name for dataset '{blobDataset.FolderPath}'"); } logger.Write("\nContainer Name: {0}", GetContainerName(blobDataset.FolderPath)); logger.Write("\nDirectory Name: {0}", GetDirectoryName(blobDataset.FolderPath)); }
public static System.Data.DataTable GetInputDatatable(Activity dnActivity, IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets) { //SQL or Azure Blob CSV only var inLS = LinkedServiceHelper.GetInputLinkedService(dnActivity, linkedServices, datasets); System.Data.DataTable dsRtn = GetInputDatatableShell(dnActivity, linkedServices, datasets); //Figure out which Type switch (inLS.Properties.Type) { case "AzureStorage": CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(((AzureStorageLinkedService)inLS.Properties.TypeProperties).ConnectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); AzureBlobDataset abdInput = datasets.Single(d => d.Name == dnActivity.Inputs.First().Name).Properties.TypeProperties as AzureBlobDataset; CloudBlockBlob cbbInputFile = new CloudBlockBlob(new Uri(inputStorageAccount.BlobEndpoint.AbsoluteUri + abdInput.FolderPath + "/" + abdInput.FileName)); System.IO.MemoryStream ms = new System.IO.MemoryStream(); cbbInputFile.DownloadToStream(ms); ms.Position = 0; using (Microsoft.VisualBasic.FileIO.TextFieldParser tfp = new Microsoft.VisualBasic.FileIO.TextFieldParser(ms)) { tfp.TextFieldType = Microsoft.VisualBasic.FileIO.FieldType.Delimited; tfp.SetDelimiters(","); while (!tfp.EndOfData) { string[] fields = tfp.ReadFields(); dsRtn.LoadDataRow(fields, true); } } break; case "AzureSqlDatabase": AzureSqlTableDataset astInput = datasets.Single(d => d.Name == dnActivity.Inputs.First().Name).Properties.TypeProperties as AzureSqlTableDataset; System.Data.SqlClient.SqlConnection scInput = new System.Data.SqlClient.SqlConnection(((AzureSqlDatabaseLinkedService)inLS.Properties.TypeProperties).ConnectionString); System.Data.SqlClient.SqlCommand commInput = new System.Data.SqlClient.SqlCommand(); commInput.Connection = scInput; commInput.CommandType = System.Data.CommandType.Text; commInput.CommandText = string.Format("SELECT * FROM [{0}]", astInput.TableName); System.Data.SqlClient.SqlDataAdapter sdaInput = new System.Data.SqlClient.SqlDataAdapter(commInput); sdaInput.Fill(dsRtn); break; default: throw new NotImplementedException(); } return(dsRtn); }
public BlobUtilities GetBlob(string datasetName) { Dataset dataset = Datasets.Single(x => x.Name == datasetName); AzureBlobDataset outputProps = (AzureBlobDataset)dataset.Properties.TypeProperties; AzureStorageLinkedService outputLinkedService = LinkedServices.First(x => x.Name == dataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string outputConnectionString = outputLinkedService?.ConnectionString; BlobUtilities outputBlob = new BlobUtilities(Logger, outputConnectionString, outputProps.FolderPath); return(outputBlob); }
/// <summary> /// Delete azure blob file or entire folder /// </summary> /// <param name="dataSetsToDelete"></param> /// public void DeleteBlobFileFolder(List <string> dataSetsToDelete) { foreach (string strInputToDelete in dataSetsToDelete) { Dataset inputDataset = datasets.First(ds => ds.Name.Equals(strInputToDelete)); AzureBlobDataset blobDataset = inputDataset.Properties.TypeProperties as AzureBlobDataset; logger.Write("\nBlob folder: " + blobDataset.FolderPath); logger.Write("\nBlob file: " + blobDataset.FileName); // linked service for input and output is the same. AzureStorageLinkedService linkedService = linkedServices.First(ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // create storage client for input. Pass the connection string. CloudStorageAccount storageAccount = CloudStorageAccount.Parse(linkedService.ConnectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); // find blob to delete and delete if exists. Uri blobUri = new Uri(storageAccount.BlobEndpoint, blobDataset.FolderPath + blobDataset.FileName); CloudBlockBlob blob = new CloudBlockBlob(blobUri, storageAccount.Credentials); logger.Write("Blob Uri: {0}", blobUri.AbsoluteUri); logger.Write("Blob exists: {0}", blob.Exists()); blob.DeleteIfExists(); logger.Write("Deleted blob: {0}", blobUri.AbsoluteUri); // Ensure the container is exist. if (blobDataset.FolderPath.IndexOf("/") > 0) { string containerName = blobDataset.FolderPath.Substring(0, blobDataset.FolderPath.IndexOf("/")); logger.Write("Container Name {0}", containerName); string directoryName = blobDataset.FolderPath.Substring(blobDataset.FolderPath.IndexOf("/") + 1); logger.Write("Directory Name {0}", directoryName); var blobContainer = client.GetContainerReference(containerName); blobContainer.CreateIfNotExists(); CloudBlobDirectory cbd = blobContainer.GetDirectoryReference(directoryName); foreach (IListBlobItem item in blobContainer.ListBlobs(directoryName, true)) { logger.Write("Blob Uri: {0} ", item.Uri.AbsoluteUri); if (item.GetType() == typeof(CloudBlockBlob) || item.GetType().BaseType == typeof(CloudBlockBlob)) { CloudBlockBlob subBlob = new CloudBlockBlob(item.Uri, storageAccount.Credentials); logger.Write("Blob exists: {0}", subBlob.Exists()); subBlob.DeleteIfExists(); logger.Write("Deleted blob {0}", item.Uri.AbsoluteUri); } } } } }
/// <summary> /// Gets the folderPath value from the input/output dataset. /// </summary> private static string GetFolderPath(Dataset dataArtifact) { if (dataArtifact == null || dataArtifact.Properties == null) { return(null); } AzureBlobDataset blobDataset = dataArtifact.Properties.TypeProperties as AzureBlobDataset; if (blobDataset == null) { return(null); } return(blobDataset.FolderPath); }
/// <summary> /// Gets the fileName value from the input/output dataset. /// </summary> private static string GetFileName(Dataset dataArtifact) { if (dataArtifact == null || dataArtifact.Properties == null) { return(null); } // get type properties of the dataset AzureBlobDataset blobDataset = dataArtifact.Properties.TypeProperties as AzureBlobDataset; if (blobDataset == null) { return(null); } // return the blob/file name in the type properties return(blobDataset.FileName); }
/// <summary> /// Gets the folderPath value from the input/output dataset. /// </summary> private static string GetFolderPath(Dataset dataArtifact) { if (dataArtifact == null || dataArtifact.Properties == null) { return(null); } // get type properties of the dataset AzureBlobDataset blobDataset = dataArtifact.Properties.TypeProperties as AzureBlobDataset; if (blobDataset == null) { return(null); } // return the folder path found in the type properties return(blobDataset.FolderPath); }
public IDictionary <string, string> Execute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { try { logger.Write("Custom Activity Started."); DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string inputToDelete = dotNetActivity.ExtendedProperties["InputToDelete"]; logger.Write("\nInput to delete is " + inputToDelete); logger.Write("\nAll Dataset(s) Below "); foreach (Dataset ds in datasets) { logger.Write("\nDataset: " + ds.Name); } foreach (string name in activity.Inputs.Select(i => i.Name)) { logger.Write("\nInput Dataset: " + name); } foreach (string name in activity.Outputs.Select(i => i.Name)) { logger.Write("\nOutput Dataset: " + name); } List <string> dataSetsToDelete = inputToDelete.Split(',').ToList(); foreach (string strInputToDelete in dataSetsToDelete) { Dataset inputDataset = datasets.First(ds => ds.Name.Equals(strInputToDelete)); AzureBlobDataset blobDataset = inputDataset.Properties.TypeProperties as AzureBlobDataset; logger.Write("\nBlob folder: " + blobDataset.FolderPath); logger.Write("\nBlob file: " + blobDataset.FileName); // linked service for input and output is the same. AzureStorageLinkedService linkedService = linkedServices.First(ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // create storage client for input. Pass the connection string. CloudStorageAccount storageAccount = CloudStorageAccount.Parse(linkedService.ConnectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); // find blob to delete and delete if exists. Uri blobUri = new Uri(storageAccount.BlobEndpoint, blobDataset.FolderPath + blobDataset.FileName); CloudBlockBlob blob = new CloudBlockBlob(blobUri, storageAccount.Credentials); logger.Write("Blob Uri: {0}", blobUri.AbsoluteUri); logger.Write("Blob exists: {0}", blob.Exists()); blob.DeleteIfExists(); logger.Write("Deleted blob: {0}", blobUri.AbsoluteUri); // Ensure the container is exist. if (blobDataset.FolderPath.IndexOf("/") > 0) { string containerName = blobDataset.FolderPath.Substring(0, blobDataset.FolderPath.IndexOf("/")); logger.Write("Container Name {0}", containerName); string directoryName = blobDataset.FolderPath.Substring(blobDataset.FolderPath.IndexOf("/") + 1); logger.Write("Directory Name {0}", directoryName); var blobContainer = client.GetContainerReference(containerName); blobContainer.CreateIfNotExists(); CloudBlobDirectory cbd = blobContainer.GetDirectoryReference(directoryName); foreach (IListBlobItem item in blobContainer.ListBlobs(directoryName, true)) { logger.Write("Blob Uri: {0} ", item.Uri.AbsoluteUri); if (item.GetType() == typeof(CloudBlockBlob) || item.GetType().BaseType == typeof(CloudBlockBlob)) { CloudBlockBlob subBlob = new CloudBlockBlob(item.Uri, storageAccount.Credentials); logger.Write("Blob exists: {0}", subBlob.Exists()); subBlob.DeleteIfExists(); logger.Write("Deleted blob {0}", item.Uri.AbsoluteUri); } } } } logger.Write("Custom Activity Ended Successfully."); } catch (Exception e) { logger.Write("Custom Activity Failed with error."); logger.Write("Caught exception: "); logger.Write(e.Message); throw new Exception(e.Message); } // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
public static DotNetActivityContext DeserializeActivity(string pipelineFileName, string activityName, string configFile = null, string adfFilesPath = @"..\..\..\McioppDataFactory") { // Get Key Vault settings if secure publish is being used on the local machine AdfFileHelper adfFileHelper = null; string settingsFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), "SecurePublishSettings.json"); if (File.Exists(settingsFile)) { AppSettings settings = JsonConvert.DeserializeObject <AppSettings>(File.ReadAllText(settingsFile)); X509Certificate2 cert = KeyVaultResolver.FindCertificateByThumbprint(settings.KeyVaultCertThumbprint); string suffix = settings.EnvironmentSettings.First().KeyVaultDnsSuffix; suffix = string.IsNullOrEmpty(suffix) ? "vault.azure.net:443" : suffix; KeyVaultResolver keyVaultResolver = new KeyVaultResolver(settings.EnvironmentSettings.First().KeyVaultName, suffix, settings.KeyVaultCertClientId, cert); adfFileHelper = new AdfFileHelper(keyVaultResolver, new Logger()); } adfFilesPath = Path.GetFullPath(adfFilesPath); var deploymentDict = new Dictionary <string, Dictionary <string, string> >(); if (!string.IsNullOrEmpty(configFile)) { // Get deployment config string deploymentConfigPath = Path.Combine(adfFilesPath, configFile); var deploymentConfigJson = File.ReadAllText(deploymentConfigPath); var deploymentJObj = JObject.Parse(deploymentConfigJson); deploymentDict = deploymentJObj.Properties() .ToDictionary(x => x.Name, y => y.Value.ToDictionary(z => z["name"].ToString(), z => z["value"].ToString())); } DotNetActivityContext context = new DotNetActivityContext { LinkedServices = new List <LinkedService>(), Datasets = new List <Dataset>(), Activity = new Activity(), Logger = new ActivityLogger() }; string pipelinePath = Path.Combine(adfFilesPath, pipelineFileName); string pipelineJson = File.ReadAllText(pipelinePath); string pipelineName = Path.GetFileNameWithoutExtension(pipelineFileName); // Update with values from delpoyment config if exists if (deploymentDict.Count > 0 && deploymentDict.ContainsKey(pipelineName)) { JObject pipelineJObject = JObject.Parse(pipelineJson); foreach (KeyValuePair <string, string> pair in deploymentDict[pipelineName]) { JToken token = pipelineJObject.SelectToken(pair.Key); token.Replace(pair.Value); } pipelineJson = pipelineJObject.ToString(); } // Search for Key Vault references in the pipeline and replace with their Key Vault equivalents if found if (adfFileHelper != null) { pipelineJson = adfFileHelper.ResolveKeyVault(pipelineJson).Result; } var dummyPipeline = JsonConvert.DeserializeObject <Models.Pipeline>(pipelineJson); Models.Activity dummyActivity; try { dummyActivity = dummyPipeline.Properties.Activities.Single(x => x.Name == activityName); } catch (InvalidOperationException) { throw new Exception($"Activity {activityName} not found in {pipelinePath}."); } context.Activity.Name = dummyActivity.Name; context.Activity.TypeProperties = new DotNetActivity(); DotNetActivity dotNetActivity = (DotNetActivity)context.Activity.TypeProperties; dotNetActivity.ExtendedProperties = dummyActivity.DotNetActivityTypeProperties.ExtendedProperties; // get the input and output tables var dummyDatasets = new HashSet <Models.ActivityData>(); dummyDatasets.UnionWith(dummyActivity.Inputs); dummyDatasets.UnionWith(dummyActivity.Outputs); var dummyServices = new HashSet <Models.LinkedService>(); // init the data tables foreach (var dummyDataset in dummyDatasets) { // parse the table json source var dataPath = Path.Combine(adfFilesPath, dummyDataset.Name + ".json"); var dataJson = File.ReadAllText(dataPath); var dummyTable = JsonConvert.DeserializeObject <Models.Table>(dataJson); { // initialize dataset properties DatasetTypeProperties datasetProperties; switch (dummyTable.Properties.Type) { case "AzureBlob": // init the azure model var blobDataset = new AzureBlobDataset { FolderPath = dummyTable.Properties.TypeProperties.FolderPath, FileName = dummyTable.Properties.TypeProperties.FileName }; datasetProperties = blobDataset; break; case "AzureTable": case "AzureSqlTable": var tableDataset = new AzureTableDataset { TableName = dummyTable.Properties.TypeProperties.TableName }; datasetProperties = tableDataset; break; case "SqlServerTable": var sqlTableDataset = new SqlServerTableDataset(dummyTable.Properties.TypeProperties.TableName); datasetProperties = sqlTableDataset; break; default: throw new Exception($"Unexpected Dataset.Type {dummyTable.Properties.Type}"); } // initialize dataset var dataDataset = new Dataset( dummyDataset.Name, new DatasetProperties( datasetProperties, new Availability(), string.Empty ) ); dataDataset.Properties.LinkedServiceName = dummyTable.Properties.LinkedServiceName; context.Datasets.Add(dataDataset); } // register the inputs and outputs in the activity if (dummyDataset is Models.ActivityInput) { context.Activity.Inputs.Add(new ActivityInput(dummyDataset.Name)); } if (dummyDataset is Models.ActivityOutput) { context.Activity.Outputs.Add(new ActivityOutput(dummyDataset.Name)); } // parse the linked service json source for later use string linkedServiceName = dummyTable.Properties.LinkedServiceName; var servicePath = Path.Combine(adfFilesPath, linkedServiceName + ".json"); string serviceJson = File.ReadAllText(servicePath); string linkedServiceType = string.Empty; // Update with values from delpoyment config if exists if (deploymentDict.Count > 0 && deploymentDict.ContainsKey(linkedServiceName)) { JObject serviceJObject = JObject.Parse(serviceJson); linkedServiceType = serviceJObject["properties"]["type"].ToObject <string>(); foreach (KeyValuePair <string, string> pair in deploymentDict[linkedServiceName]) { JToken token = serviceJObject.SelectToken(pair.Key); token.Replace(pair.Value); } serviceJson = serviceJObject.ToString(); } else { JObject serviceJObject = JObject.Parse(serviceJson); linkedServiceType = serviceJObject["properties"]["type"].ToObject <string>(); } // Search for Key Vault references in the linked service and replace with their Key Vault equivalents if found if (adfFileHelper != null) { serviceJson = adfFileHelper.ResolveKeyVault(serviceJson).Result; } Models.LinkedService storageService; switch (linkedServiceType) { case "AzureSqlDatabase": case "OnPremisesSqlServer": storageService = JsonConvert.DeserializeObject <Models.AzureSqlDatabaseLinkedService>(serviceJson); break; case "AzureStorage": storageService = JsonConvert.DeserializeObject <Models.StorageService>(serviceJson); break; default: throw new Exception($"Mapper for linked service type '{linkedServiceType}' not found."); } dummyServices.Add(storageService); } // parse the hd insight service json source var computeServicePath = Path.Combine(adfFilesPath, dummyActivity.LinkedServiceName + ".json"); var computeServiceJson = File.ReadAllText(computeServicePath); var computeService = JsonConvert.DeserializeObject <Models.ComputeService>(computeServiceJson); dummyServices.Add(computeService); // init the services foreach (var dummyService in dummyServices) { LinkedService linkedService = null; // init if it is a storage service if (dummyService is Models.StorageService) { var dummyStorageService = dummyService as Models.StorageService; var service = new AzureStorageLinkedService { ConnectionString = dummyStorageService.Properties.TypeProperties.ConnectionString }; linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } // init if it is a AzureSqlDatabase service if (dummyService is Models.AzureSqlDatabaseLinkedService) { var dummyStorageService = dummyService as Models.AzureSqlDatabaseLinkedService; var service = new AzureSqlDatabaseLinkedService() { ConnectionString = dummyStorageService.Properties.TypeProperties.ConnectionString }; linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } // init if it is a hd insight service if (dummyService is Models.ComputeService) { var service = new HDInsightLinkedService(); linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } context.LinkedServices.Add(linkedService); } return(context); }
public static void WriteOutputDataset(Activity dnActivity, IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, System.Data.DataSet dsOutput) { //SQL or Azure Blob CSV only var outLS = LinkedServiceHelper.GetOutputLinkedService(dnActivity, linkedServices, datasets); //Figure out which Type switch (outLS.Properties.Type) { case "AzureStorage": CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(((AzureStorageLinkedService)outLS.Properties.TypeProperties).ConnectionString); CloudBlobClient outputClient = outputStorageAccount.CreateCloudBlobClient(); foreach (var t in dnActivity.Outputs) { AzureBlobDataset abdOutput = datasets.Single(d => d.Name == t.Name).Properties.TypeProperties as AzureBlobDataset; CloudBlobContainer container = outputClient.GetContainerReference(abdOutput.FolderPath.Split('/').First()); string sFolder = abdOutput.FolderPath.Replace(abdOutput.FolderPath.Split('/').First(), ""); CloudBlockBlob cbbOutputFile = container.GetBlockBlobReference(sFolder.Length > 0 ? sFolder + "/" + abdOutput.FileName : abdOutput.FileName); using (System.IO.MemoryStream ms = new System.IO.MemoryStream()) using (var swOutput = new System.IO.StreamWriter(ms)) { System.Data.DataTable dt = dsOutput.Tables[t.Name]; foreach (System.Data.DataRow r in dt.Rows) { foreach (System.Data.DataColumn c in dt.Columns) { if (dt.Columns.IndexOf(c) == 0) { swOutput.Write("\""); } swOutput.Write(r[c]); if (dt.Columns.IndexOf(c) != dt.Columns.Count - 1) { swOutput.Write("\",\""); } else { swOutput.WriteLine("\""); } } } swOutput.Flush(); ms.Position = 0; cbbOutputFile.UploadFromStream(ms, null, null, null); } } break; case "AzureSqlDatabase": foreach (var t in dnActivity.Outputs) { AzureSqlTableDataset astOutput = datasets.Single(d => d.Name == t.Name).Properties.TypeProperties as AzureSqlTableDataset; System.Data.SqlClient.SqlConnection scOutput = new System.Data.SqlClient.SqlConnection(((AzureSqlDatabaseLinkedService)outLS.Properties.TypeProperties).ConnectionString); System.Data.SqlClient.SqlCommand commOutput = new System.Data.SqlClient.SqlCommand(); commOutput.Connection = scOutput; commOutput.CommandType = System.Data.CommandType.TableDirect; commOutput.CommandText = astOutput.TableName; System.Data.SqlClient.SqlDataAdapter sdaInput = new System.Data.SqlClient.SqlDataAdapter(commOutput); sdaInput.Update(dsOutput.Tables[t.Name]); } break; default: throw new NotImplementedException(); } }