/// <summary> /// Transforms the values of a column in an Azure table. The column may be a normal column or the RowKey column, but cannot be the PartitionKey column. /// The column to be transformed is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be transformed /// columnType - Data type of the column. Only supported types right now are: int32, bool, and string /// ifColumnValueMatches - The transformation is applied only if the contents of column value matches the specified value. /// replaceColumnValueWith - Replace the contents of the matched column value with the specified value. /// ifRowKeyContains - The transformation is applied only if the contents of row key contains the specified value. /// replaceRowKeySubStrWith - Replace the contents of the matched row key with the specified value to generate a new row key. /// rowKeyPrefixes - Rowkey prefixes of the rows in which the column transformation will be applied. This is optional and will identify the subset of rows to do this operation. /// You can specify columnName,columnType,ifColumnValueMatches,replaceColumnValueWith or ifRowKeyContains,replaceRowKeySubStrWith or both as they work on different column types /// Extended Properties Example /// "columnName": "IdentityProviderType", /// "columnType": "string", /// "ifColumnValueMatches": "Beihai", /// "replaceColumnValueWith": "AADS2S", /// "ifRowKeyContains": "Beihai", /// "replaceRowKeySubStrWith": "AADS2S" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks for the column, apply the column transformation if the column value match is found /// checks for the row key update, apply the row key transformation if the row key match is found /// runs a replace table operation in case of column transformation only /// runs a delete insert operation in case of row key transformation /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Microsoft.Azure.Management.DataFactories.Models.Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } bool hasColumnUpdate = false; string columnName = string.Empty, columnType = string.Empty, ifColumnValueMatches = string.Empty, replaceColumnValueWith = string.Empty; if (extendedProperties.ContainsKey("columnName")) { columnName = extendedProperties["columnName"]; columnType = extendedProperties["columnType"]; ifColumnValueMatches = extendedProperties["ifColumnValueMatches"]; replaceColumnValueWith = extendedProperties["replaceColumnValueWith"]; hasColumnUpdate = true; } bool hasRowKeyUpdate = false; string ifRowKeyContains = string.Empty, replaceRowKeySubStrWith = string.Empty; if (extendedProperties.ContainsKey("ifRowKeyContains")) { ifRowKeyContains = extendedProperties["ifRowKeyContains"]; replaceRowKeySubStrWith = extendedProperties["replaceRowKeySubStrWith"]; hasRowKeyUpdate = true; } AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { string cachedRowkey = e.RowKey; IDictionary <string, EntityProperty> cachedProperties = new Dictionary <string, EntityProperty>(); foreach (KeyValuePair <string, EntityProperty> p in e.Properties) { cachedProperties.Add(p); } bool recordUpdated = false, requiresDelete = false; if (hasColumnUpdate) { recordUpdated = this.ReplaceIfMatch(e, columnName, columnType, ifColumnValueMatches, replaceColumnValueWith); } if (hasRowKeyUpdate && e.RowKey.Contains(ifRowKeyContains)) { e.RowKey = e.RowKey.Replace(ifRowKeyContains, replaceRowKeySubStrWith); recordUpdated = true; requiresDelete = true; } if (recordUpdated) { if (!requiresDelete) { batch.Replace(e); } else { batch.Insert(e); batch.Delete(new DynamicTableEntity(e.PartitionKey, cachedRowkey, "*", cachedProperties)); } actualAffectedRecords++; logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey); } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Deletes a column from an Azure table. /// The table for the column is same as the input table from the dataset /// The column to be deleted is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be deleted /// rowKeyPrefix - Rowkey prefix of the row from which the column will be deleted. This is optional and will identify the subset of rows to do this operation. /// columnName is mandatory. /// Extended Properties Example /// "columnName": "UseDefault", /// "rowKeyPrefix": "IdentityCredentialsObject:" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefix, /// checks for the column, removes the column if found and runs a replace table operation to replace the contents of /// row/entity in the table. /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (!extendedProperties.ContainsKey("columnName")) { throw new ArgumentException("Column name is required", "columnName"); } string columnName = extendedProperties["columnName"]; string rowKeyPrefix = string.Empty; if (extendedProperties.ContainsKey("rowKeyPrefix")) { rowKeyPrefix = extendedProperties["rowKeyPrefix"]; } AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where string.IsNullOrWhiteSpace(rowKeyPrefix) ? true : s.RowKey.StartsWith(rowKeyPrefix) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { // If the columnName exist in the properties, then Remove it if (e.Properties.ContainsKey(columnName)) { e.Properties.Remove(columnName); batch.Replace(e); logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey); } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); actualAffectedRecords += batch.Count; } } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); // The batch operations complete when Task.WaitAll completes Task.WaitAll(tasks.ToArray()); logger.Write("Deleted column from {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Updates a column value in Apps Azure table. /// The column to be transformed is specified using the following extended properties /// Extended Properties /// columnName - Name of the column to be added /// columnType - Data type of the column. Only supported types right now are: int32, bool, and string /// rowKeyPrefixes - Rowkey prefixes of the rows in which the column update will be applied. This is optional and will identify the subset of rows to do this operation. /// partitionKeyOwnerValueRule - The updates are specified using the partition key owner and the value for it in the ; separated key-value format. /// Extended Properties Example /// "columnName": "DisableHandleValidation", /// "columnType": "bool", /// "rowKeyPrefix": "ProfilesObject:" /// "partitionKeyOwnerValueRule": "Beihai=true;EndToEndTests=true" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks if the column is present, updates the column value if the partition key belongs to the app handles /// associated with the owner specified in partitionKeyOwnerValueRule /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (!extendedProperties.ContainsKey("columnName")) { throw new ArgumentException("Column name is required", "columnName"); } string columnName = extendedProperties["columnName"]; if (!extendedProperties.ContainsKey("columnType")) { throw new ArgumentException("Column Type information is required", "columnType"); } string columnType = extendedProperties["columnType"]; // Note that partitionKeyOwnerValueRule is required as the rules for updating value comes from it // We do not update column value with default value if the matching rule is not found. The record is ignored. All rules need to be explicitly specified if (!extendedProperties.ContainsKey("partitionKeyOwnerValueRule")) { throw new ArgumentException("PartitionKeyOwnerValueRule information is required", "partitionKeyOwnerValueRule"); } string partitionKeyOwnerValueRule = extendedProperties["partitionKeyOwnerValueRule"]; string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } var partitionKeyOwnerValueRuleDict = partitionKeyOwnerValueRule.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries) .Select(part => part.Split('=')) .ToDictionary(split => split[0], split => split[1]); var appHandles = ownerAppHandles.Where(item => partitionKeyOwnerValueRuleDict.ContainsKey(item.Key)).SelectMany(item => item.Value).ToList(); logger.Write("Matching appHandles:{0}", string.Join(",", appHandles)); AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { // If appHandles do not contain the partition key, Continue if (!appHandles.Contains(e.PartitionKey)) { continue; } else { // Pick the value to be used for specified AppHandle // This is done by getting the owber key first from e.PartitionKey var ownerKey = ownerAppHandles.FirstOrDefault(x => x.Value.Contains(e.PartitionKey)).Key; // The owner key is used to pick the value for the column string newColumnValue = partitionKeyOwnerValueRuleDict[ownerKey]; if (this.ReplaceColumnValue(e, columnName, columnType, newColumnValue)) { batch.Merge(e); logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey); } } } if (batch.Count > 0) { tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch)); actualAffectedRecords += batch.Count; } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Transforms Azure table partition key /// The partition key to be transformed is specified using the following extended properties /// Extended Properties /// ifPartitionKeyContains - The transformation is applied only if the contents of partition key contains the specified value. /// replacePartitionKeySubStrWith - Replace the contents of the matched partition key with the specified value to generate a new partition key. /// rowKeyPrefixes - Rowkey prefixes of the rows in which the partition key transformation will be applied. This is optional and will identify the subset of rows to do this operation. /// ifPartitionKeyContains,replacePartitionKeySubStrWith are mandatory /// Extended Properties Example /// "ifPartitionKeyContains": "Beihai", /// "replacePartitionKeySubStrWith": "AADS2S" /// Activity Operation /// The activity iterates through all the rows from the input table with the matching rowKeyPrefixes, /// checks for the partition key update, apply the partition key transformation if the partition key match is found /// runs an insert operation for entities with new partition key and delete operation on existing entities with matching partition keys /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Microsoft.Azure.Management.DataFactories.Models.Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } string[] rowKeyPrefixes = null; if (extendedProperties.ContainsKey("rowKeyPrefixes")) { rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(','); } if (!extendedProperties.ContainsKey("ifPartitionKeyContains")) { throw new ArgumentException("Partition key match criteria is required", "ifPartitionKeyContains"); } if (!extendedProperties.ContainsKey("replacePartitionKeySubStrWith")) { throw new ArgumentException("Partition key substring replacement value is required", "replacePartitionKeySubStrWith"); } string ifPartitionKeyContains = extendedProperties["ifPartitionKeyContains"]; string replacePartitionKeySubStrWith = extendedProperties["replacePartitionKeySubStrWith"]; AzureStorageLinkedService inputLinkedService; AzureTableDataset sourceTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); long totalProcessedRecords = 0; long actualAffectedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes) select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation deleteBatch = new TableBatchOperation(); TableBatchOperation insertBatch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { if (!e.PartitionKey.Contains(ifPartitionKeyContains)) { continue; } DynamicTableEntity newEntity = new DynamicTableEntity( e.PartitionKey.Replace(ifPartitionKeyContains, replacePartitionKeySubStrWith), e.RowKey); foreach (KeyValuePair <string, EntityProperty> property in e.Properties) { newEntity.Properties.Add(property); } insertBatch.InsertOrReplace(newEntity); deleteBatch.Delete(e); actualAffectedRecords++; } if (insertBatch.Count > 0) { tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(insertBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger)); } if (deleteBatch.Count > 0) { tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(deleteBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger)); } logger.Write("Updated partition: {0}", g.Key); } totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); logger.Write("Affected records count: {0}", actualAffectedRecords); }while (tableContinuationToken != null); Task.WaitAll(tasks.ToArray()); logger.Write("Updated {0} records", actualAffectedRecords); return(new Dictionary <string, string>()); }
/// <summary> /// Method copies an Azure table /// The table to be copied is same as the input table from the dataset /// Extended Properties /// ignore - Name of the columns (comma separated) to be ignored as part of copy operation. This is an optional paramater. /// Activity Operation /// The activity iterates through all the rows from the input table, /// checks for the column to be ignored, remove the ignored columns is found, /// runs an InsertOrReplace table operation to insert or replace the contents of /// row/entity in the table. /// </summary> /// <param name="linkedServices">Linked services referenced by activity definition.</param> /// <param name="datasets">Datasets referenced by activity definition.</param> /// <param name="activity">Activity definition.</param> /// <param name="logger">Used to log messages during activity execution.</param> /// <returns>Activity state at the end of execution</returns> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties; string[] columnsToIgnore = null; // extendedProperties are optional for this activity if (extendedProperties != null) { logger.Write("Logging extended properties if any..."); foreach (KeyValuePair <string, string> entry in extendedProperties) { logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value); } if (extendedProperties.ContainsKey("ignore")) { columnsToIgnore = extendedProperties["ignore"].Split(','); } } AzureStorageLinkedService inputLinkedService, outputLinkedService; AzureTableDataset sourceTable, destinationTable; // For activities working on a single dataset, the first entry is the input dataset. // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("input table:{0}", sourceTable.TableName); Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); destinationTable = outputDataset.Properties.TypeProperties as AzureTableDataset; logger.Write("output table:{0}", destinationTable.TableName); inputLinkedService = linkedServices.First( ls => ls.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string inputConnectionString = inputLinkedService.ConnectionString; outputLinkedService = linkedServices.First( ls => ls.Name == outputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string outputConnectionString = outputLinkedService.ConnectionString; // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString); CloudTableClient inputTableClient = inputStorageAccount.CreateCloudTableClient(); CloudTable inputTable = inputTableClient.GetTableReference(sourceTable.TableName); CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(outputConnectionString); CloudTableClient outputTableClient = outputStorageAccount.CreateCloudTableClient(); CloudTable outputTable = outputTableClient.GetTableReference(destinationTable.TableName); if (!outputTable.Exists()) { outputTable.Create(); } long totalProcessedRecords = 0; TableContinuationToken tableContinuationToken = null; List <Task> tasks = new List <Task>(); do { var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken); tableContinuationToken = resultSegment.ContinuationToken; var partitionGroups = (from s in resultSegment.Results select s).GroupBy(a => a.PartitionKey); foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups) { TableBatchOperation batch = new TableBatchOperation(); foreach (DynamicTableEntity e in g.AsEnumerable()) { if (columnsToIgnore != null && columnsToIgnore.Length > 0) { foreach (string column in columnsToIgnore) { if (e.Properties.ContainsKey(column)) { e.Properties.Remove(column); } } } batch.InsertOrReplace(e); logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey); } if (batch.Count > 0) { tasks.Add(outputTable.ExecuteBatchInChunkAsync(batch)); } logger.Write("Copied data for partition: {0}", g.Key); } // In case of Copy, number of processed and affected records is the same totalProcessedRecords += resultSegment.Results.Count; logger.Write("Processed records count: {0}", totalProcessedRecords); }while (tableContinuationToken != null); // The batch operations complete when Task.WaitAll completes Task.WaitAll(tasks.ToArray()); logger.Write("Copied {0} records from {1} to {2}", totalProcessedRecords, sourceTable.TableName, destinationTable.TableName); return(new Dictionary <string, string>()); }