public CustomActivityBase() { if (Debugger.IsAttached) { var attributes = this.GetType().GetMethod("RunActivity").CustomAttributes; var customActivityAttribute = attributes.FirstOrDefault(x => x.AttributeType.Name == "CustomActivityAttribute"); string activityName = customActivityAttribute?.NamedArguments?.FirstOrDefault(x => x.MemberName == "ActivityName").TypedValue.Value?.ToString(); string pipelineLocation = customActivityAttribute?.NamedArguments?.FirstOrDefault(x => x.MemberName == "PipelineLocation").TypedValue.Value?.ToString(); string deployConfig = customActivityAttribute?.NamedArguments?.FirstOrDefault(x => x.MemberName == "DeployConfig").TypedValue.Value?.ToString(); if (!string.IsNullOrEmpty(activityName) || !string.IsNullOrEmpty(pipelineLocation)) { string dataFactoryProjLocation = Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), "..\\..", Path.GetDirectoryName(pipelineLocation))); DotNetActivityContext context = Runner.DeserializeActivity(Path.GetFileName(pipelineLocation), activityName, deployConfig, dataFactoryProjLocation); LinkedServices = context.LinkedServices; Datasets = context.Datasets; Activity = context.Activity; Logger = context.Logger; typeProperties = Activity.TypeProperties as DotNetActivity; } else { throw new Exception($"The CustomActivity attribute needs to have the following properties populated: {nameof(CustomActivityAttribute.PipelineLocation)} and {nameof(CustomActivityAttribute.ActivityName)}"); } } }
public IDictionary <string, string> Execute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { logger.Write("Start activity"); DotNetActivity dotNetActivityPipeline = (DotNetActivity)activity.TypeProperties; modelid = dotNetActivityPipeline.ExtendedProperties["modelid"]; accountKey = dotNetActivityPipeline.ExtendedProperties["accountKey"]; baseUri = dotNetActivityPipeline.ExtendedProperties["baseuri"]; logger.Write("modelId: {0}; AccountKey:{1}; BaseUri: {2}", modelid, accountKey, baseUri); logger.Write("Start with Export"); try { var inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name); var ouputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); logger.Write(inputDataset.Name); //RecommendationsApiWrapper api = new RecommendationsApiWrapper(accountKey, baseUri); } catch (Exception ex) { logger.Write("Error occurred: {0} - {1}", ex.Message, ex.InnerException.ToString()); } logger.Write("end"); return(new Dictionary <string, string>()); }
public IDictionary <string, string> Execute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { LinkedServices = linkedServices; Datasets = datasets; Activity = activity; Logger = logger; typeProperties = Activity.TypeProperties as DotNetActivity; return(RunActivity()); }
public Dictionary <string, string> GetAllExtendedProperties() { DotNetActivity dotNetActivity = (DotNetActivity)Activity.TypeProperties; if (!dotNetActivity.ExtendedProperties.Any()) { throw new Exception($"No properties found in the extended properties section of the custom activity '{Activity.Name}'"); } return(dotNetActivity.ExtendedProperties as Dictionary <string, string>); }
private void ValidateParameters(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { if (linkedServices == null) { throw new ArgumentNullException("linkedServices"); } if (datasets == null) { throw new ArgumentNullException("datasets"); } if (activity == null) { throw new ArgumentNullException("activity"); } if (logger == null) { throw new ArgumentNullException("logger"); } // Verify datasets if (!activity.Inputs.Any()) { throw new ArgumentException("At least one input dataset is required"); } if (activity.Outputs.Count != 1) { throw new ArgumentException("Only one output datasets is required, as a dummy"); } foreach (LinkedService ls in linkedServices) { logger.Write("Detected linkedService.Name {0}", ls.Name); } DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; // Ensure required parameters are included if (!dotNetActivity.ExtendedProperties.ContainsKey(ADV_AS_PROCESS_SCRIPT_PATH_PARAMETER_NAME)) { if (!dotNetActivity.ExtendedProperties.ContainsKey(TABULAR_DATABASE_NAME_PARAMETER_NAME)) { throw new ArgumentException(TABULAR_DATABASE_NAME_PARAMETER_NAME); } } if (!dotNetActivity.ExtendedProperties.ContainsKey(AZUREAS_CONNECTION_STRING_PARAMETER_NAME)) { throw new ArgumentException(AZUREAS_CONNECTION_STRING_PARAMETER_NAME); } logger.Write("Parameters validated"); }
private ProcessAzureASContext CreateContext(IEnumerable <LinkedService> linkedServices, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; var tabularDatabaseName = dotNetActivity.ExtendedProperties[TABULAR_DATABASE_NAME_PARAMETER_NAME]; var aasConnectionString = dotNetActivity.ExtendedProperties[AZUREAS_CONNECTION_STRING_PARAMETER_NAME]; return(new ProcessAzureASContext { TabularDatabaseName = tabularDatabaseName, AzureASConnectionString = aasConnectionString }); }
public IDictionary <string, string> Execute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { try { logger.Write("Custom Activity Started."); DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string inputToDelete = dotNetActivity.ExtendedProperties["InputToDelete"]; logger.Write("\nInput to delete is " + inputToDelete); logger.Write("\nAll Dataset(s) Below "); foreach (Dataset ds in datasets) { logger.Write("\nDataset: " + ds.Name); } foreach (string name in activity.Inputs.Select(i => i.Name)) { logger.Write("\nInput Dataset: " + name); } foreach (string name in activity.Outputs.Select(i => i.Name)) { logger.Write("\nOutput Dataset: " + name); } List <string> dataSetsToDelete = inputToDelete.Split(',').ToList(); DeleteBlobFileFolder(dataSetsToDelete); logger.Write("Custom Activity Ended Successfully."); } catch (Exception e) { logger.Write("Custom Activity Failed with error."); logger.Write("Caught exception: "); logger.Write(e.Message); throw new Exception(e.Message); } // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
private static string GetAzureADToken(DotNetActivity dotNetActivity, string aasConnectionString) { var authority = dotNetActivity.ExtendedProperties[AZUREAD_AUTHORITY_PARAMETER_NAME]; var resource = dotNetActivity.ExtendedProperties[AZUREAD_RESOURCE_PARAMETER_NAME]; var clientId = dotNetActivity.ExtendedProperties[AZUREAD_CLIENTID_PARAMETER_NAME]; var clientSecret = dotNetActivity.ExtendedProperties[AZUREAD_CLIENTSECRET_PARAMETER_NAME]; AuthenticationContext authContext = new AuthenticationContext(authority); ClientCredential cc = new ClientCredential(clientId, clientSecret); var task = authContext.AcquireTokenAsync(resource, cc); task.Wait(); AuthenticationResult token = task.Result; aasConnectionString = string.Format(CultureInfo.InvariantCulture, aasConnectionString, token.AccessToken); return(aasConnectionString); }
private ProcessAzureASContext CreateContext(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; var tabularDatabaseName = dotNetActivity.ExtendedProperties[TABULAR_DATABASE_NAME_PARAMETER_NAME]; var aasConnectionString = dotNetActivity.ExtendedProperties[AZUREAS_CONNECTION_STRING_PARAMETER_NAME]; var advASProcessingScriptPath = ""; if (dotNetActivity.ExtendedProperties.ContainsKey(ADV_AS_PROCESS_SCRIPT_PATH_PARAMETER_NAME)) { advASProcessingScriptPath = dotNetActivity.ExtendedProperties[ADV_AS_PROCESS_SCRIPT_PATH_PARAMETER_NAME]; } if (dotNetActivity.ExtendedProperties.ContainsKey(AZUREAD_AUTHORITY_PARAMETER_NAME)) { aasConnectionString = GetAzureADToken(dotNetActivity, aasConnectionString); } //Get Azure Storage Linked Service Connection String from the dummy output dataset, //AS processing does not produce output dataset, so we use this to access the TMSL script for AS processing AzureStorageLinkedService outputLinkedService; Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); AzureBlobDataset outputTypeProperties; outputTypeProperties = outputDataset.Properties.TypeProperties as AzureBlobDataset; // get the Azure Storate linked service from linkedServices object outputLinkedService = linkedServices.First( linkedService => linkedService.Name == outputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // get the connection string in the linked service string blobconnectionString = outputLinkedService.ConnectionString; return(new ProcessAzureASContext { TabularDatabaseName = tabularDatabaseName, AzureASConnectionString = aasConnectionString, AdvancedASProcessingScriptPath = advASProcessingScriptPath, BlobStorageConnectionString = blobconnectionString }); }
//public IDictionary<string, string> Execute( // IEnumerable<ResolvedTable> inputTables, // IEnumerable<ResolvedTable> outputTables, // IDictionary<string, string> inputs, // IActivityLogger activityLogger) public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // to get extended properties (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; _dataStorageAccountName = dotNetActivity.ExtendedProperties["dataStorageAccountName"]; _dataStorageAccountKey = dotNetActivity.ExtendedProperties["dataStorageAccountKey"]; _dataStorageContainer = dotNetActivity.ExtendedProperties["dataStorageContainer"]; string sliceStartTime = dotNetActivity.ExtendedProperties["sliceStart"]; string urlFormat = dotNetActivity.ExtendedProperties["urlFormat"]; _logger = logger; GatherDataForOneHour(sliceStartTime, urlFormat); _logger.Write("Exit"); return(new Dictionary <string, string>()); }
protected override GeoCodeContext PreExecute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // Process ADF artifacts up front as these objects are not serializable across app domain boundaries. Dataset dataset = datasets.First(ds => ds.Name == activity.Inputs.Single().Name); var blobProperties = (AzureBlobDataset)dataset.Properties.TypeProperties; LinkedService linkedService = linkedServices.First(ls => ls.Name == dataset.Properties.LinkedServiceName); var storageProperties = (AzureStorageLinkedService)linkedService.Properties.TypeProperties; // to get extended properties (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; return(new GeoCodeContext { ConnectionString = storageProperties.ConnectionString, FolderPath = blobProperties.FolderPath, FileName = blobProperties.FileName, OutputFolder = dotNetActivity.ExtendedProperties["OutputFolder"], MapsAPIKey = dotNetActivity.ExtendedProperties["MapsAPIKey"] }); }
public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // to get extended properties (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; uri = dotNetActivity.ExtendedProperties["uri"]; expiry = EpochGenerator.GetEpochTime(1).ToString(); key = dotNetActivity.ExtendedProperties["key"]; policyName = dotNetActivity.ExtendedProperties["policyName"]; dataStorageAccountName = dotNetActivity.ExtendedProperties["dataStorageAccountName"]; dataStorageContainer = dotNetActivity.ExtendedProperties["dataStorageContainer"]; dataStorageAccountKey = dotNetActivity.ExtendedProperties["dataStorageAccountKey"]; _logger = logger; GatherDataFromIotHub(); _logger.Write("Exit"); return(new Dictionary <string, string>()); }
//public IDictionary<string, string> Execute( // IEnumerable<ResolvedTable> inputTables, // IEnumerable<ResolvedTable> outputTables, // IDictionary<string, string> inputs, // IActivityLogger activityLogger) public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // to get extended properties (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; _dataStorageAccountName = dotNetActivity.ExtendedProperties["dataStorageAccountName"]; _dataStorageAccountKey = dotNetActivity.ExtendedProperties["dataStorageAccountKey"]; _dataStorageContainer = dotNetActivity.ExtendedProperties["dataStorageContainer"]; DateTime sliceEnd = DateTime.Parse(dotNetActivity.ExtendedProperties["sliceEnd"]); string sliceStartTime = dotNetActivity.ExtendedProperties["sliceStart"]; string urlFormat = dotNetActivity.ExtendedProperties["urlFormat"]; _logger = logger; _logger.Write("This is just a simple CustomActivity which does nothing at the moment and is only here for testing!"); //GatherDataForOneHour(sliceStartTime, urlFormat); return(new Dictionary <string, string>()); }
private void LogDataFactoryElements(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { logger.Write("\n******** Data Factory info ********"); DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("\nLogging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("\n\t<key:{0}> <value:{1}>", entry.Key, entry.Value); } logger.Write("\nAll Linked Services(s) Below "); foreach (LinkedService ls in linkedServices) { logger.Write("\n\tLinked Service: " + ls.Name); } logger.Write("\nAll Dataset(s) Below "); foreach (Dataset ds in datasets) { logger.Write("\n\tDataset: " + ds.Name); } foreach (string name in activity.Inputs.Select(i => i.Name)) { logger.Write("\nInput Dataset: " + name); } foreach (string name in activity.Outputs.Select(i => i.Name)) { logger.Write("\nOutput Dataset: " + name); } }
/// <summary> /// Transforms the values of a column in an Azure table. The column may be a normal column or the RowKey column, but cannot be the PartitionKey column. /// The column to be transformed is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be transformed /// columnType - Data type of the column. Only supported types right now are: int32, bool, and string /// ifColumnValueMatches - The transformation is applied only if the contents of column value matches the specified value. /// replaceColumnValueWith - Replace the contents of the matched column value with the specified value. /// ifRowKeyContains - The transformation is applied only if the contents of row key contains the specified value. /// replaceRowKeySubStrWith - Replace the contents of the matched row key with the specified value to generate a new row key. /// rowKeyPrefixes - Rowkey prefixes of the rows in which the column transformation will be applied. This is optional and will identify the subset of rows to do this operation. /// You can specify columnName,columnType,ifColumnValueMatches,replaceColumnValueWith or ifRowKeyContains,replaceRowKeySubStrWith or both as they work on different column types /// Extended Properties Example /// "columnName": "IdentityProviderType", /// "columnType": "string", /// "ifColumnValueMatches": "Beihai", /// "replaceColumnValueWith": "AADS2S", /// "ifRowKeyContains": "Beihai", /// "replaceRowKeySubStrWith": "AADS2S" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks for the column, apply the column transformation if the column value match is found /// checks for the row key update, apply the row key transformation if the row key match is found /// runs a replace table operation in case of column transformation only /// runs a delete insert operation in case of row key transformation /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Microsoft.Azure.Management.DataFactories.Models.Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } bool hasColumnUpdate = false; string columnName = string.Empty, columnType = string.Empty, ifColumnValueMatches = string.Empty, replaceColumnValueWith = string.Empty; if (extendedProperties.ContainsKey("columnName")) { columnName = extendedProperties["columnName"]; columnType = extendedProperties["columnType"]; ifColumnValueMatches = extendedProperties["ifColumnValueMatches"]; replaceColumnValueWith = extendedProperties["replaceColumnValueWith"]; hasColumnUpdate = true; } bool hasRowKeyUpdate = false; string ifRowKeyContains = string.Empty, replaceRowKeySubStrWith = string.Empty; if (extendedProperties.ContainsKey("ifRowKeyContains")) { ifRowKeyContains = extendedProperties["ifRowKeyContains"]; replaceRowKeySubStrWith = extendedProperties["replaceRowKeySubStrWith"]; hasRowKeyUpdate = true; } AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { string cachedRowkey = e.RowKey; IDictionary <string, EntityProperty> cachedProperties = new Dictionary <string, EntityProperty>(); foreach (KeyValuePair <string, EntityProperty> p in e.Properties) { cachedProperties.Add(p); } bool recordUpdated = false, requiresDelete = false; if (hasColumnUpdate) { recordUpdated = this.ReplaceIfMatch(e, columnName, columnType, ifColumnValueMatches, replaceColumnValueWith); } if (hasRowKeyUpdate && e.RowKey.Contains(ifRowKeyContains)) { e.RowKey = e.RowKey.Replace(ifRowKeyContains, replaceRowKeySubStrWith); recordUpdated = true; requiresDelete = true; } if (recordUpdated) { if (!requiresDelete) { batch.Replace(e); } else { batch.Insert(e); batch.Delete(new DynamicTableEntity(e.PartitionKey, cachedRowkey, "*", cachedProperties)); } actualAffectedRecords++; logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey); } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Transforms Azure table partition key /// The partition key to be transformed is specified using the following extended properties /// Extended Properties /// ifPartitionKeyContains - The transformation is applied only if the contents of partition key contains the specified value. /// replacePartitionKeySubStrWith - Replace the contents of the matched partition key with the specified value to generate a new partition key. /// rowKeyPrefixes - Rowkey prefixes of the rows in which the partition key transformation will be applied. This is optional and will identify the subset of rows to do this operation. /// ifPartitionKeyContains,replacePartitionKeySubStrWith are mandatory /// Extended Properties Example /// "ifPartitionKeyContains": "Beihai", /// "replacePartitionKeySubStrWith": "AADS2S" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks for the partition key update, apply the partition key transformation if the partition key match is found /// runs an insert operation for entities with new partition key and delete operation on existing entities with matching partition keys /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Microsoft.Azure.Management.DataFactories.Models.Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } if (!extendedProperties.ContainsKey("ifPartitionKeyContains")) { throw new ArgumentException("Partition key match criteria is required", "ifPartitionKeyContains"); } if (!extendedProperties.ContainsKey("replacePartitionKeySubStrWith")) { throw new ArgumentException("Partition key substring replacement value is required", "replacePartitionKeySubStrWith"); } string ifPartitionKeyContains = extendedProperties["ifPartitionKeyContains"]; string replacePartitionKeySubStrWith = extendedProperties["replacePartitionKeySubStrWith"]; AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation deleteBatch = new TableBatchOperation(); TableBatchOperation insertBatch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { if (!e.PartitionKey.Contains(ifPartitionKeyContains)) { continue; } DynamicTableEntity newEntity = new DynamicTableEntity( e.PartitionKey.Replace(ifPartitionKeyContains, replacePartitionKeySubStrWith), e.RowKey); foreach (KeyValuePair <string, EntityProperty> property in e.Properties) { newEntity.Properties.Add(property); } insertBatch.InsertOrReplace(newEntity); deleteBatch.Delete(e); actualAffectedRecords++; } if (insertBatch.Count > 0) { tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(insertBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger)); } if (deleteBatch.Count > 0) { tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(deleteBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger)); } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Execute method is the only method of IDotNetActivity interface you must implement. /// In this sample, the method invokes the Calculate method to perform the core logic. /// </summary> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // get extended properties defined in activity JSON definition // (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string sliceStartString = dotNetActivity.ExtendedProperties["SliceStart"]; // linked service for input and output data stores // in this example, same storage is used for both input/output AzureStorageLinkedService inputLinkedService; // get the input dataset Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name); // declare variables to hold type properties of input/output datasets AzureBlobDataset inputTypeProperties, outputTypeProperties; // get type properties from the dataset object inputTypeProperties = inputDataset.Properties.TypeProperties as AzureBlobDataset; // log linked services passed in linkedServices parameter // you will see two linked services of type: AzureStorageLinkedService // one for input dataset and the other for output dataset foreach (LinkedService ls in linkedServices) { logger.Write("linkedService.Name {0}", ls.Name); } // get the first Azure Storate linked service from linkedServices object // using First method instead of Single since we are using the same // Azure Storage linked service for input and output. inputLinkedService = linkedServices.First( linkedService => linkedService.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // get the connection string in the linked service string connectionString = inputLinkedService.ConnectionString; // get the folder path from the input dataset definition string folderPath = GetFolderPath(inputDataset); string output = string.Empty; // for use later. // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); // initialize the continuation token before using it in the do-while loop. BlobContinuationToken continuationToken = null; do { // get the list of input blobs from the input storage client object. BlobResultSegment blobList = inputClient.ListBlobsSegmented(folderPath, true, BlobListingDetails.Metadata, null, continuationToken, null, null); // Calculate method performs the core logic output = Calculate(blobList, logger, folderPath, ref continuationToken); } while (continuationToken != null); // get the output dataset using the name of the dataset matched to a name in the Activity output collection. Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); // get type properties for the output dataset outputTypeProperties = outputDataset.Properties.TypeProperties as AzureBlobDataset; // get the folder path from the output dataset definition folderPath = GetFolderPath(outputDataset); // log the output folder path logger.Write("Writing blob to the folder: {0}", folderPath); // create a storage object for the output blob. CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(connectionString); // write the name of the file. Uri outputBlobUri = new Uri(outputStorageAccount.BlobEndpoint, folderPath + "/" + GetFileName(outputDataset)); // log the output file name logger.Write("output blob URI: {0}", outputBlobUri.ToString()); // create a blob and upload the output text. CloudBlockBlob outputBlob = new CloudBlockBlob(outputBlobUri, outputStorageAccount.Credentials); logger.Write("Writing {0} to the output blob", output); outputBlob.UploadText(output); // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
/// <summary> /// Method deletes multiple specified tables /// The tables to be deleted are specified using the following extended properties /// Extended Properties /// tablesToDelete - Name of the tables (comma separated) to be deleted. /// At least one table needs to be specified. /// Extended Properties Example /// "tablesToDelete": "Following", /// Activity Operation /// The activity iterates through all the tables from the tablesToDelete extended property, /// checks for the table and deletes it if found. /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } string[] tablesToDelete = null; if (extendedProperties.ContainsKey("tablesToDelete")) { tablesToDelete = extendedProperties["tablesToDelete"].Split(','); } if (tablesToDelete == null || tablesToDelete.Length <= 0) { logger.Write("No tables to delete"); return(new Dictionary <string, string>()); } AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // Use the input dataset to get the storage connection string // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount storageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient tableClient = storageAccount.CreateCloudTableClient(); foreach (string tableName in tablesToDelete) { CloudTable table = tableClient.GetTableReference(tableName); if (!table.Exists()) { logger.Write("Table {0} does not exist.", tableName); } else { table.Delete(); logger.Write("Table {0} deleted.", tableName); } } return(new Dictionary <string, string>()); }
/// <summary> /// Updates a column value in Apps Azure table. /// The column to be transformed is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be added /// columnType - Data type of the column. Only supported types right now are: int32, bool, and string /// rowKeyPrefixes - Rowkey prefixes of the rows in which the column update will be applied. This is optional and will identify the subset of rows to do this operation. /// partitionKeyOwnerValueRule - The updates are specified using the partition key owner and the value for it in the ; separated key-value format. /// Extended Properties Example /// "columnName": "DisableHandleValidation", /// "columnType": "bool", /// "rowKeyPrefix": "ProfilesObject:" /// "partitionKeyOwnerValueRule": "Beihai=true;EndToEndTests=true" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks if the column is present, updates the column value if the partition key belongs to the app handles /// associated with the owner specified in partitionKeyOwnerValueRule /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (!extendedProperties.ContainsKey("columnName")) { throw new ArgumentException("Column name is required", "columnName"); } string columnName = extendedProperties["columnName"]; if (!extendedProperties.ContainsKey("columnType")) { throw new ArgumentException("Column Type information is required", "columnType"); } string columnType = extendedProperties["columnType"]; // Note that partitionKeyOwnerValueRule is required as the rules for updating value comes from it // We do not update column value with default value if the matching rule is not found. The record is ignored. All rules need to be explicitly specified if (!extendedProperties.ContainsKey("partitionKeyOwnerValueRule")) { throw new ArgumentException("PartitionKeyOwnerValueRule information is required", "partitionKeyOwnerValueRule"); } string partitionKeyOwnerValueRule = extendedProperties["partitionKeyOwnerValueRule"]; string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } var partitionKeyOwnerValueRuleDict = partitionKeyOwnerValueRule.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries) .Select(part => part.Split('=')) .ToDictionary(split => split[0], split => split[1]); var appHandles = ownerAppHandles.Where(item => partitionKeyOwnerValueRuleDict.ContainsKey(item.Key)).SelectMany(item => item.Value).ToList(); logger.Write("Matching appHandles:{0}", string.Join(",", appHandles)); AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { // If appHandles do not contain the partition key, Continue if (!appHandles.Contains(e.PartitionKey)) { continue; } else { // Pick the value to be used for specified AppHandle // This is done by getting the owber key first from e.PartitionKey var ownerKey = ownerAppHandles.FirstOrDefault(x => x.Value.Contains(e.PartitionKey)).Key; // The owner key is used to pick the value for the column string newColumnValue = partitionKeyOwnerValueRuleDict[ownerKey]; if (this.ReplaceColumnValue(e, columnName, columnType, newColumnValue)) { batch.Merge(e); logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey); } } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); actualAffectedRecords += batch.Count; } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
public IDictionary <string, string> Execute(IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { try { logger.Write("Custom Activity Started."); DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string inputToDelete = dotNetActivity.ExtendedProperties["InputToDelete"]; logger.Write("\nInput to delete is " + inputToDelete); logger.Write("\nAll Dataset(s) Below "); foreach (Dataset ds in datasets) { logger.Write("\nDataset: " + ds.Name); } foreach (string name in activity.Inputs.Select(i => i.Name)) { logger.Write("\nInput Dataset: " + name); } foreach (string name in activity.Outputs.Select(i => i.Name)) { logger.Write("\nOutput Dataset: " + name); } List <string> dataSetsToDelete = inputToDelete.Split(',').ToList(); foreach (string strInputToDelete in dataSetsToDelete) { Dataset inputDataset = datasets.First(ds => ds.Name.Equals(strInputToDelete)); AzureBlobDataset blobDataset = inputDataset.Properties.TypeProperties as AzureBlobDataset; logger.Write("\nBlob folder: " + blobDataset.FolderPath); logger.Write("\nBlob file: " + blobDataset.FileName); // linked service for input and output is the same. AzureStorageLinkedService linkedService = linkedServices.First(ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // create storage client for input. Pass the connection string. CloudStorageAccount storageAccount = CloudStorageAccount.Parse(linkedService.ConnectionString); CloudBlobClient client = storageAccount.CreateCloudBlobClient(); // find blob to delete and delete if exists. Uri blobUri = new Uri(storageAccount.BlobEndpoint, blobDataset.FolderPath + blobDataset.FileName); CloudBlockBlob blob = new CloudBlockBlob(blobUri, storageAccount.Credentials); logger.Write("Blob Uri: {0}", blobUri.AbsoluteUri); logger.Write("Blob exists: {0}", blob.Exists()); blob.DeleteIfExists(); logger.Write("Deleted blob: {0}", blobUri.AbsoluteUri); // Ensure the container is exist. if (blobDataset.FolderPath.IndexOf("/") > 0) { string containerName = blobDataset.FolderPath.Substring(0, blobDataset.FolderPath.IndexOf("/")); logger.Write("Container Name {0}", containerName); string directoryName = blobDataset.FolderPath.Substring(blobDataset.FolderPath.IndexOf("/") + 1); logger.Write("Directory Name {0}", directoryName); var blobContainer = client.GetContainerReference(containerName); blobContainer.CreateIfNotExists(); CloudBlobDirectory cbd = blobContainer.GetDirectoryReference(directoryName); foreach (IListBlobItem item in blobContainer.ListBlobs(directoryName, true)) { logger.Write("Blob Uri: {0} ", item.Uri.AbsoluteUri); if (item.GetType() == typeof(CloudBlockBlob) || item.GetType().BaseType == typeof(CloudBlockBlob)) { CloudBlockBlob subBlob = new CloudBlockBlob(item.Uri, storageAccount.Credentials); logger.Write("Blob exists: {0}", subBlob.Exists()); subBlob.DeleteIfExists(); logger.Write("Deleted blob {0}", item.Uri.AbsoluteUri); } } } } logger.Write("Custom Activity Ended Successfully."); } catch (Exception e) { logger.Write("Custom Activity Failed with error."); logger.Write("Caught exception: "); logger.Write(e.Message); throw new Exception(e.Message); } // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
/// <summary> /// Starts an existing custom C# activity locally and enables local debugging. You need to set a breakpoint in your custom component's code. /// </summary> /// <param name="pipelineName">The name of the pipeline which contains the custom C# activity</param> /// <param name="activityName">The name of the activity which you want to debug</param> /// <param name="sliceStart">SliceStart which is used when the activity is executed</param> /// <param name="sliceEnd">SliceStart which is used when the activity is executed</param> /// <param name="activityLogger">Allows you to specify a custom Activity Logger to do your logging. Default is a Console Logger.</param> /// <returns></returns> public IDictionary <string, string> ExecuteActivity(string pipelineName, string activityName, DateTime sliceStart, DateTime sliceEnd, IActivityLogger activityLogger) { Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Debugging Custom Activity '{0}' from Pipeline '{1}' ...", activityName, pipelineName); Console.WriteLine("The Code from the last build of the ADF project will be used ({0}). Make sure to rebuild the ADF project if it does not reflect your latest changes!", _buildPath); Dictionary <string, string> ret = null; string dependencyPath = Path.Combine(Environment.CurrentDirectory, "CustomActivityDependencies_TEMP"); if (Directory.Exists(dependencyPath)) { try { // it might happen that two activities are executed in the same run and the directory is blocked // so we need to catch the exception and continue with our execution // the folder might not be cleaned up properly in this case but during the execution of the first activity it will Directory.Delete(dependencyPath, true); } catch (UnauthorizedAccessException e) { } } if (!Pipelines.ContainsKey(pipelineName)) { throw new KeyNotFoundException(string.Format("A pipeline with the name \"{0}\" was not found. Please check the spelling and make sure it was loaded correctly in the ADF Local Environment and see the console output", pipelineName)); } // don not apply Configuration again for GetADFObjectFromJson as this would overwrite changes done by MapSlices!!! Pipeline pipeline = (Pipeline)GetADFObjectFromJson(MapSlices(_armFiles.Single(x => x.Value["name"].ToString() == Pipelines[pipelineName].Name).Value, sliceStart, sliceEnd), "Pipeline", false); Activity activityMeta = pipeline.GetActivityByName(activityName); // create a list of all Input- and Output-Datasets defined for the Activity List <Dataset> activityInputDatasets = _adfDataSets.Values.Where(adfDS => activityMeta.Inputs.Any(ds => adfDS.Name == ds.Name)).ToList(); List <Dataset> activityOutputDatasets = _adfDataSets.Values.Where(adfDS => activityMeta.Outputs.Any(ds => adfDS.Name == ds.Name)).ToList(); List <Dataset> activityAllDatasets = activityInputDatasets.Concat(activityOutputDatasets).ToList(); List <LinkedService> activityLinkedServices = new List <LinkedService>(); // apply the Slice-Settings to all relevant objects (Datasets and Activity) for (int i = 0; i < activityAllDatasets.Count; i++) { // MapSlices for the used Datasets activityAllDatasets[i] = (Dataset)GetADFObjectFromJson(MapSlices(_armFiles[activityAllDatasets[i].Name + ".json"], sliceStart, sliceEnd), "Dataset", false); // currently, as of 2017-01-25, the same LinkedService might get added multiple times if it is referenced by multiple datasets // this is the same behavior as if the activity was executed with ADF Service!!! activityLinkedServices.Add(_adfLinkedServices.Values.Single(x => x.Name == activityAllDatasets[i].Properties.LinkedServiceName)); } DotNetActivity dotNetActivityMeta = (DotNetActivity)activityMeta.TypeProperties; Console.WriteLine("The Custom Activity refers to the following ZIP-file: '{0}'", dotNetActivityMeta.PackageFile); FileInfo zipFile = _adfDependencies.Single(x => dotNetActivityMeta.PackageFile.EndsWith(x.Value.Name)).Value; Console.WriteLine("Using '{0}' from ZIP-file '{1}'!", dotNetActivityMeta.AssemblyName, zipFile.FullName); UnzipFile(zipFile, dependencyPath); Assembly assembly = Assembly.LoadFrom(dependencyPath + "\\" + dotNetActivityMeta.AssemblyName); Type type = assembly.GetType(dotNetActivityMeta.EntryPoint); IDotNetActivity dotNetActivityExecute = Activator.CreateInstance(type) as IDotNetActivity; Console.WriteLine("Executing Function '{0}'...{1}--------------------------------------------------------------------------", dotNetActivityMeta.EntryPoint, Environment.NewLine); Console.ForegroundColor = ConsoleColor.Gray; ret = (Dictionary <string, string>)dotNetActivityExecute.Execute(activityLinkedServices, activityAllDatasets, activityMeta, activityLogger); if (Directory.Exists(dependencyPath)) { try { // This might fail as the DLL is still loaded in the current Application Domain Directory.Delete(dependencyPath, true); } catch (UnauthorizedAccessException e) { } } return(ret); }
public static DotNetActivityContext DeserializeActivity(string pipelineFileName, string activityName, string configFile = null, string adfFilesPath = @"..\..\..\McioppDataFactory") { // Get Key Vault settings if secure publish is being used on the local machine AdfFileHelper adfFileHelper = null; string settingsFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), "SecurePublishSettings.json"); if (File.Exists(settingsFile)) { AppSettings settings = JsonConvert.DeserializeObject <AppSettings>(File.ReadAllText(settingsFile)); X509Certificate2 cert = KeyVaultResolver.FindCertificateByThumbprint(settings.KeyVaultCertThumbprint); string suffix = settings.EnvironmentSettings.First().KeyVaultDnsSuffix; suffix = string.IsNullOrEmpty(suffix) ? "vault.azure.net:443" : suffix; KeyVaultResolver keyVaultResolver = new KeyVaultResolver(settings.EnvironmentSettings.First().KeyVaultName, suffix, settings.KeyVaultCertClientId, cert); adfFileHelper = new AdfFileHelper(keyVaultResolver, new Logger()); } adfFilesPath = Path.GetFullPath(adfFilesPath); var deploymentDict = new Dictionary <string, Dictionary <string, string> >(); if (!string.IsNullOrEmpty(configFile)) { // Get deployment config string deploymentConfigPath = Path.Combine(adfFilesPath, configFile); var deploymentConfigJson = File.ReadAllText(deploymentConfigPath); var deploymentJObj = JObject.Parse(deploymentConfigJson); deploymentDict = deploymentJObj.Properties() .ToDictionary(x => x.Name, y => y.Value.ToDictionary(z => z["name"].ToString(), z => z["value"].ToString())); } DotNetActivityContext context = new DotNetActivityContext { LinkedServices = new List <LinkedService>(), Datasets = new List <Dataset>(), Activity = new Activity(), Logger = new ActivityLogger() }; string pipelinePath = Path.Combine(adfFilesPath, pipelineFileName); string pipelineJson = File.ReadAllText(pipelinePath); string pipelineName = Path.GetFileNameWithoutExtension(pipelineFileName); // Update with values from delpoyment config if exists if (deploymentDict.Count > 0 && deploymentDict.ContainsKey(pipelineName)) { JObject pipelineJObject = JObject.Parse(pipelineJson); foreach (KeyValuePair <string, string> pair in deploymentDict[pipelineName]) { JToken token = pipelineJObject.SelectToken(pair.Key); token.Replace(pair.Value); } pipelineJson = pipelineJObject.ToString(); } // Search for Key Vault references in the pipeline and replace with their Key Vault equivalents if found if (adfFileHelper != null) { pipelineJson = adfFileHelper.ResolveKeyVault(pipelineJson).Result; } var dummyPipeline = JsonConvert.DeserializeObject <Models.Pipeline>(pipelineJson); Models.Activity dummyActivity; try { dummyActivity = dummyPipeline.Properties.Activities.Single(x => x.Name == activityName); } catch (InvalidOperationException) { throw new Exception($"Activity {activityName} not found in {pipelinePath}."); } context.Activity.Name = dummyActivity.Name; context.Activity.TypeProperties = new DotNetActivity(); DotNetActivity dotNetActivity = (DotNetActivity)context.Activity.TypeProperties; dotNetActivity.ExtendedProperties = dummyActivity.DotNetActivityTypeProperties.ExtendedProperties; // get the input and output tables var dummyDatasets = new HashSet <Models.ActivityData>(); dummyDatasets.UnionWith(dummyActivity.Inputs); dummyDatasets.UnionWith(dummyActivity.Outputs); var dummyServices = new HashSet <Models.LinkedService>(); // init the data tables foreach (var dummyDataset in dummyDatasets) { // parse the table json source var dataPath = Path.Combine(adfFilesPath, dummyDataset.Name + ".json"); var dataJson = File.ReadAllText(dataPath); var dummyTable = JsonConvert.DeserializeObject <Models.Table>(dataJson); { // initialize dataset properties DatasetTypeProperties datasetProperties; switch (dummyTable.Properties.Type) { case "AzureBlob": // init the azure model var blobDataset = new AzureBlobDataset { FolderPath = dummyTable.Properties.TypeProperties.FolderPath, FileName = dummyTable.Properties.TypeProperties.FileName }; datasetProperties = blobDataset; break; case "AzureTable": case "AzureSqlTable": var tableDataset = new AzureTableDataset { TableName = dummyTable.Properties.TypeProperties.TableName }; datasetProperties = tableDataset; break; case "SqlServerTable": var sqlTableDataset = new SqlServerTableDataset(dummyTable.Properties.TypeProperties.TableName); datasetProperties = sqlTableDataset; break; default: throw new Exception($"Unexpected Dataset.Type {dummyTable.Properties.Type}"); } // initialize dataset var dataDataset = new Dataset( dummyDataset.Name, new DatasetProperties( datasetProperties, new Availability(), string.Empty ) ); dataDataset.Properties.LinkedServiceName = dummyTable.Properties.LinkedServiceName; context.Datasets.Add(dataDataset); } // register the inputs and outputs in the activity if (dummyDataset is Models.ActivityInput) { context.Activity.Inputs.Add(new ActivityInput(dummyDataset.Name)); } if (dummyDataset is Models.ActivityOutput) { context.Activity.Outputs.Add(new ActivityOutput(dummyDataset.Name)); } // parse the linked service json source for later use string linkedServiceName = dummyTable.Properties.LinkedServiceName; var servicePath = Path.Combine(adfFilesPath, linkedServiceName + ".json"); string serviceJson = File.ReadAllText(servicePath); string linkedServiceType = string.Empty; // Update with values from delpoyment config if exists if (deploymentDict.Count > 0 && deploymentDict.ContainsKey(linkedServiceName)) { JObject serviceJObject = JObject.Parse(serviceJson); linkedServiceType = serviceJObject["properties"]["type"].ToObject <string>(); foreach (KeyValuePair <string, string> pair in deploymentDict[linkedServiceName]) { JToken token = serviceJObject.SelectToken(pair.Key); token.Replace(pair.Value); } serviceJson = serviceJObject.ToString(); } else { JObject serviceJObject = JObject.Parse(serviceJson); linkedServiceType = serviceJObject["properties"]["type"].ToObject <string>(); } // Search for Key Vault references in the linked service and replace with their Key Vault equivalents if found if (adfFileHelper != null) { serviceJson = adfFileHelper.ResolveKeyVault(serviceJson).Result; } Models.LinkedService storageService; switch (linkedServiceType) { case "AzureSqlDatabase": case "OnPremisesSqlServer": storageService = JsonConvert.DeserializeObject <Models.AzureSqlDatabaseLinkedService>(serviceJson); break; case "AzureStorage": storageService = JsonConvert.DeserializeObject <Models.StorageService>(serviceJson); break; default: throw new Exception($"Mapper for linked service type '{linkedServiceType}' not found."); } dummyServices.Add(storageService); } // parse the hd insight service json source var computeServicePath = Path.Combine(adfFilesPath, dummyActivity.LinkedServiceName + ".json"); var computeServiceJson = File.ReadAllText(computeServicePath); var computeService = JsonConvert.DeserializeObject <Models.ComputeService>(computeServiceJson); dummyServices.Add(computeService); // init the services foreach (var dummyService in dummyServices) { LinkedService linkedService = null; // init if it is a storage service if (dummyService is Models.StorageService) { var dummyStorageService = dummyService as Models.StorageService; var service = new AzureStorageLinkedService { ConnectionString = dummyStorageService.Properties.TypeProperties.ConnectionString }; linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } // init if it is a AzureSqlDatabase service if (dummyService is Models.AzureSqlDatabaseLinkedService) { var dummyStorageService = dummyService as Models.AzureSqlDatabaseLinkedService; var service = new AzureSqlDatabaseLinkedService() { ConnectionString = dummyStorageService.Properties.TypeProperties.ConnectionString }; linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } // init if it is a hd insight service if (dummyService is Models.ComputeService) { var service = new HDInsightLinkedService(); linkedService = new LinkedService( dummyService.Name, new LinkedServiceProperties(service) ); } context.LinkedServices.Add(linkedService); } return(context); }
/// <summary> /// Deletes a column from an Azure table. /// The table for the column is same as the input table from the dataset /// The column to be deleted is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be deleted /// rowKeyPrefix - Rowkey prefix of the row from which the column will be deleted. This is optional and will identify the subset of rows to do this operation. /// columnName is mandatory. /// Extended Properties Example /// "columnName": "UseDefault", /// "rowKeyPrefix": "IdentityCredentialsObject:" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefix, /// checks for the column, removes the column if found and runs a replace table operation to replace the contents of /// row/entity in the table. /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (!extendedProperties.ContainsKey("columnName")) { throw new ArgumentException("Column name is required", "columnName"); } string columnName = extendedProperties["columnName"]; string rowKeyPrefix = string.Empty; if (extendedProperties.ContainsKey("rowKeyPrefix")) { rowKeyPrefix = extendedProperties["rowKeyPrefix"]; } AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where string.IsNullOrWhiteSpace(rowKeyPrefix) ? true : s.RowKey.StartsWith(rowKeyPrefix) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { // If the columnName exist in the properties, then Remove it if (e.Properties.ContainsKey(columnName)) { e.Properties.Remove(columnName); batch.Replace(e); logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey); } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); actualAffectedRecords += batch.Count; } } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); // The batch operations complete when Task.WaitAll completes Task.WaitAll(tasks.ToArray()); logger.Write("Deleted column from {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { _logger = logger; _logger.Write("######Execute Begin######"); // to get extended properties (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string sliceStartTime = dotNetActivity.ExtendedProperties["SliceStart"]; _logger.Write("Slice start time is : {0}", sliceStartTime); _baseUrl = dotNetActivity.ExtendedProperties["baseUrl"]; if (String.IsNullOrEmpty(_baseUrl)) { _logger.Write("Null or Empty Base URL for ML Model: {0}", _baseUrl); throw new Exception(string.Format("Null or Empty Base URL for ML Model: {0}", _baseUrl)); } _logger.Write("Base ML Azure Website url is : {0}", _baseUrl); _apiKey = dotNetActivity.ExtendedProperties["apiKey"]; if (String.IsNullOrEmpty(_apiKey)) { _logger.Write("Null or Empty API Key for ML Model: {0}", _apiKey); throw new Exception(string.Format("Null or Empty API Key for ML Model: {0}", _apiKey)); } // declare dataset types CustomDataset inputLocation; CustomDataset outputLocation; AzureStorageLinkedService inputLinkedService; AzureStorageLinkedService outputLinkedService; // Get the ADF Input Tables Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name); inputLocation = inputDataset.Properties.TypeProperties as CustomDataset; inputLinkedService = linkedServices.Single( linkedService => linkedService.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; _storageConnectionString = inputLinkedService.ConnectionString; if (String.IsNullOrEmpty(_storageConnectionString)) { _logger.Write("Null or Empty Connection string for input table: {0}", inputDataset.Name); throw new Exception(string.Format("Null or Empty Connection string for input table: {0}", inputDataset.Name)); } string folderPath = GetFolderPath(inputDataset); if (String.IsNullOrEmpty(folderPath)) { _logger.Write("Null or Empty folderpath for input table: {0}", inputDataset.Name); throw new Exception(string.Format("Null or Empty folder path for input table: {0}", inputDataset.Name)); } _storageContainerName = folderPath.Split('/')[0]; _inputBlobName = folderPath.Substring(folderPath.IndexOf('/') + 1); _logger.Write("Folder Path for Input Table {0}: {1}", inputDataset.Name, folderPath); // Get the ADF Output Tables Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); outputLocation = outputDataset.Properties.TypeProperties as CustomDataset; outputLinkedService = linkedServices.Single( linkedService => linkedService.Name == outputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; _storageConnectionString = outputLinkedService.ConnectionString; folderPath = GetFolderPath(outputDataset); if (String.IsNullOrEmpty(_storageConnectionString)) { _logger.Write("Null or Empty Connection string for output table: {0}", outputDataset.Name); throw new Exception(string.Format("Null or Empty Connection string for output table: {0}", outputDataset.Name)); } if (String.IsNullOrEmpty(folderPath)) { _logger.Write("Null or Empty folderpath for output table: {0}", outputDataset.Name); throw new Exception(string.Format("Null or Empty folder path for output table: {0}", outputDataset.Name)); } _outputBlobName = folderPath.Substring(folderPath.IndexOf('/') + 1); _logger.Write("Folder Path for Ouput Table {0}: {1}", outputDataset.Name, folderPath); try { // Invoke ML Batch Execution Service InvokeBatchExecutionService().Wait(); } catch (Exception ex) { _logger.Write("ML Model Call failed with error : {0}", ex.ToString()); throw; } return(new Dictionary <string, string>()); }
/// <summary> /// Method copies an Azure table /// The table to be copied is same as the input table from the dataset /// Extended Properties /// ignore - Name of the columns (comma separated) to be ignored as part of copy operation. This is an optional paramater. /// Activity Operation /// The activity iterates through all the rows from the input table, /// checks for the column to be ignored, remove the ignored columns is found, /// runs an InsertOrReplace table operation to insert or replace the contents of /// row/entity in the table. /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; string[] columnsToIgnore = null; // extendedProperties are optional for this activity if (extendedProperties != null) { logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (extendedProperties.ContainsKey("ignore")) { columnsToIgnore = extendedProperties["ignore"].Split(','); } } AzureStorageLinkedService inputLinkedService, outputLinkedService; AzureTableDataset sourceTable, destinationTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); destinationTable = outputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("output table:{0}", destinationTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; outputLinkedService = linkedServices.First( ls => ls.Name == outputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string outputConnectionString = outputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(outputConnectionString); CloudTableClient outputTableClient = outputStorageAccount.CreateCloudTableClient(); CloudTable outputTable = outputTableClient.GetTableReference(destinationTable.TableName); if (!outputTable.Exists()) { outputTable.Create(); } long totalProcessedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { if (columnsToIgnore != null && columnsToIgnore.Length > 0) { foreach (string column in columnsToIgnore) { if (e.Properties.ContainsKey(column)) { e.Properties.Remove(column); } } } batch.InsertOrReplace(e); logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey); } if (batch.Count > 0) { tasks.Add(outputTable.ExecuteBatchInChunkAsync(batch)); } logger.Write("Copied data for partition: {0}", g.Key); } // In case of Copy, number of processed and affected records is the same totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); }while (tableContinuationToken != null); // The batch operations complete when Task.WaitAll completes Task.WaitAll(tasks.ToArray()); logger.Write("Copied {0} records from {1} to {2}", totalProcessedRecords, sourceTable.TableName, destinationTable.TableName); return(new Dictionary <string, string>()); }