/// <summary>
        /// Transforms the values of a column in an Azure table. The column may be a normal column or the RowKey column, but cannot be the PartitionKey column.
        /// The column to be transformed is specified using the following extended properties
        /// Extended Properties
        ///     columnName - Name of the column to be transformed
        ///     columnType - Data type of the column. Only supported types right now are: int32, bool, and string
        ///     ifColumnValueMatches - The transformation is applied only if the contents of column value matches the specified value.
        ///     replaceColumnValueWith - Replace the contents of the matched column value with the specified value.
        ///     ifRowKeyContains - The transformation is applied only if the contents of row key contains the specified value.
        ///     replaceRowKeySubStrWith - Replace the contents of the matched row key with the specified value to generate a new row key.
        ///     rowKeyPrefixes - Rowkey prefixes of the rows in which the column transformation will be applied. This is optional and will identify the subset of rows to do this operation.
        /// You can specify columnName,columnType,ifColumnValueMatches,replaceColumnValueWith or ifRowKeyContains,replaceRowKeySubStrWith or both as they work on different column types
        /// Extended Properties Example
        ///   "columnName": "IdentityProviderType",
        ///   "columnType": "string",
        ///   "ifColumnValueMatches": "Beihai",
        ///   "replaceColumnValueWith": "AADS2S",
        ///   "ifRowKeyContains": "Beihai",
        ///   "replaceRowKeySubStrWith": "AADS2S"
        ///  Activity Operation
        ///     The activity iterates through all the rows from the input table with the matching rowKeyPrefixes,
        ///     checks for the column, apply the column transformation if the column value match is found
        ///     checks for the row key update, apply the row key transformation if the row key match is found
        ///     runs a replace table operation in case of column transformation only
        ///     runs a delete insert operation in case of row key transformation
        /// </summary>
        /// <param name="linkedServices">Linked services referenced by activity definition.</param>
        /// <param name="datasets">Datasets referenced by activity definition.</param>
        /// <param name="activity">Activity definition.</param>
        /// <param name="logger">Used to log messages during activity execution.</param>
        /// <returns>Activity state at the end of execution</returns>
        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Microsoft.Azure.Management.DataFactories.Models.Activity activity,
            IActivityLogger logger)
        {
            DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties;
            IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties;

            logger.Write("Logging extended properties if any...");
            foreach (KeyValuePair <string, string> entry in extendedProperties)
            {
                logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value);
            }

            string[] rowKeyPrefixes = null;
            if (extendedProperties.ContainsKey("rowKeyPrefixes"))
            {
                rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(',');
            }

            bool   hasColumnUpdate = false;
            string columnName = string.Empty, columnType = string.Empty, ifColumnValueMatches = string.Empty, replaceColumnValueWith = string.Empty;

            if (extendedProperties.ContainsKey("columnName"))
            {
                columnName             = extendedProperties["columnName"];
                columnType             = extendedProperties["columnType"];
                ifColumnValueMatches   = extendedProperties["ifColumnValueMatches"];
                replaceColumnValueWith = extendedProperties["replaceColumnValueWith"];
                hasColumnUpdate        = true;
            }

            bool   hasRowKeyUpdate = false;
            string ifRowKeyContains = string.Empty, replaceRowKeySubStrWith = string.Empty;

            if (extendedProperties.ContainsKey("ifRowKeyContains"))
            {
                ifRowKeyContains        = extendedProperties["ifRowKeyContains"];
                replaceRowKeySubStrWith = extendedProperties["replaceRowKeySubStrWith"];
                hasRowKeyUpdate         = true;
            }

            AzureStorageLinkedService inputLinkedService;
            AzureTableDataset         sourceTable;

            // For activities working on a single dataset, the first entry is the input dataset.
            // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets
            Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name);

            sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("input table:{0}", sourceTable.TableName);

            inputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                inputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                 as AzureStorageLinkedService;
            string inputConnectionString = inputLinkedService.ConnectionString;

            // create storage client for input. Pass the connection string.
            CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString);
            CloudTableClient    inputTableClient    = inputStorageAccount.CreateCloudTableClient();
            CloudTable          inputTable          = inputTableClient.GetTableReference(sourceTable.TableName);

            long totalProcessedRecords = 0;
            long actualAffectedRecords = 0;
            TableContinuationToken tableContinuationToken = null;
            List <Task>            tasks = new List <Task>();

            do
            {
                var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken);
                tableContinuationToken = resultSegment.ContinuationToken;

                var partitionGroups = (from s in resultSegment.Results
                                       where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes)
                                       select s).GroupBy(a => a.PartitionKey);

                foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups)
                {
                    TableBatchOperation batch = new TableBatchOperation();
                    foreach (DynamicTableEntity e in g.AsEnumerable())
                    {
                        string cachedRowkey = e.RowKey;
                        IDictionary <string, EntityProperty> cachedProperties = new Dictionary <string, EntityProperty>();
                        foreach (KeyValuePair <string, EntityProperty> p in e.Properties)
                        {
                            cachedProperties.Add(p);
                        }

                        bool recordUpdated = false, requiresDelete = false;
                        if (hasColumnUpdate)
                        {
                            recordUpdated = this.ReplaceIfMatch(e, columnName, columnType, ifColumnValueMatches, replaceColumnValueWith);
                        }

                        if (hasRowKeyUpdate && e.RowKey.Contains(ifRowKeyContains))
                        {
                            e.RowKey       = e.RowKey.Replace(ifRowKeyContains, replaceRowKeySubStrWith);
                            recordUpdated  = true;
                            requiresDelete = true;
                        }

                        if (recordUpdated)
                        {
                            if (!requiresDelete)
                            {
                                batch.Replace(e);
                            }
                            else
                            {
                                batch.Insert(e);
                                batch.Delete(new DynamicTableEntity(e.PartitionKey, cachedRowkey, "*", cachedProperties));
                            }

                            actualAffectedRecords++;
                            logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey);
                        }
                    }

                    if (batch.Count > 0)
                    {
                        tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch));
                    }

                    logger.Write("Updated partition: {0}", g.Key);
                }

                totalProcessedRecords += resultSegment.Results.Count;
                logger.Write("Processed records count: {0}", totalProcessedRecords);
                logger.Write("Affected records count: {0}", actualAffectedRecords);
            }while (tableContinuationToken != null);

            Task.WaitAll(tasks.ToArray());
            logger.Write("Updated {0} records", actualAffectedRecords);

            return(new Dictionary <string, string>());
        }
        /// <summary>
        /// Deletes a column from an Azure table.
        /// The table for the column is same as the input table from the dataset
        /// The column to be deleted is specified using the following extended properties
        /// Extended Properties
        ///     columnName - Name of the column to be deleted
        ///     rowKeyPrefix - Rowkey prefix of the row from which the column will be deleted. This is optional and will identify the subset of rows to do this operation.
        /// columnName is mandatory.
        /// Extended Properties Example
        ///     "columnName": "UseDefault",
        ///     "rowKeyPrefix": "IdentityCredentialsObject:"
        ///  Activity Operation
        ///     The activity iterates through all the rows from the input table with the matching rowKeyPrefix,
        ///     checks for the column, removes the column if found and runs a replace table operation to replace the contents of
        ///     row/entity in the table.
        /// </summary>
        /// <param name="linkedServices">Linked services referenced by activity definition.</param>
        /// <param name="datasets">Datasets referenced by activity definition.</param>
        /// <param name="activity">Activity definition.</param>
        /// <param name="logger">Used to log messages during activity execution.</param>
        /// <returns>Activity state at the end of execution</returns>
        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Activity activity,
            IActivityLogger logger)
        {
            DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties;
            IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties;

            logger.Write("Logging extended properties if any...");
            foreach (KeyValuePair <string, string> entry in extendedProperties)
            {
                logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value);
            }

            if (!extendedProperties.ContainsKey("columnName"))
            {
                throw new ArgumentException("Column name is required", "columnName");
            }

            string columnName = extendedProperties["columnName"];

            string rowKeyPrefix = string.Empty;

            if (extendedProperties.ContainsKey("rowKeyPrefix"))
            {
                rowKeyPrefix = extendedProperties["rowKeyPrefix"];
            }

            AzureStorageLinkedService inputLinkedService;
            AzureTableDataset         sourceTable;

            // For activities working on a single dataset, the first entry is the input dataset.
            // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets
            Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name);

            sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("input table:{0}", sourceTable.TableName);

            inputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                inputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                 as AzureStorageLinkedService;
            string inputConnectionString = inputLinkedService.ConnectionString;

            // create storage client for input. Pass the connection string.
            CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString);
            CloudTableClient    inputTableClient    = inputStorageAccount.CreateCloudTableClient();
            CloudTable          inputTable          = inputTableClient.GetTableReference(sourceTable.TableName);

            long totalProcessedRecords = 0;
            long actualAffectedRecords = 0;
            TableContinuationToken tableContinuationToken = null;
            List <Task>            tasks = new List <Task>();

            do
            {
                var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken);
                tableContinuationToken = resultSegment.ContinuationToken;

                var partitionGroups = (from s in resultSegment.Results
                                       where string.IsNullOrWhiteSpace(rowKeyPrefix) ? true : s.RowKey.StartsWith(rowKeyPrefix)
                                       select s).GroupBy(a => a.PartitionKey);

                foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups)
                {
                    TableBatchOperation batch = new TableBatchOperation();
                    foreach (DynamicTableEntity e in g.AsEnumerable())
                    {
                        // If the columnName exist in the properties, then Remove it
                        if (e.Properties.ContainsKey(columnName))
                        {
                            e.Properties.Remove(columnName);
                            batch.Replace(e);
                            logger.Write("<partition key:{0}>, <row key:{1}> added to batch", e.PartitionKey, e.RowKey);
                        }
                    }

                    if (batch.Count > 0)
                    {
                        tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch));
                        actualAffectedRecords += batch.Count;
                    }
                }

                totalProcessedRecords += resultSegment.Results.Count;
                logger.Write("Processed records count: {0}", totalProcessedRecords);
                logger.Write("Affected records count: {0}", actualAffectedRecords);
            }while (tableContinuationToken != null);

            // The batch operations complete when Task.WaitAll completes
            Task.WaitAll(tasks.ToArray());
            logger.Write("Deleted column from {0} records", actualAffectedRecords);

            return(new Dictionary <string, string>());
        }
        /// <summary>
        /// Updates a column value in Apps Azure table.
        /// The column to be transformed is specified using the following extended properties
        /// Extended Properties
        ///     columnName - Name of the column to be added
        ///     columnType - Data type of the column. Only supported types right now are: int32, bool, and string
        ///     rowKeyPrefixes - Rowkey prefixes of the rows in which the column update will be applied. This is optional and will identify the subset of rows to do this operation.
        ///     partitionKeyOwnerValueRule - The updates are specified using the partition key owner and the value for it in the ; separated key-value format.
        /// Extended Properties Example
        ///   "columnName": "DisableHandleValidation",
        ///   "columnType": "bool",
        ///   "rowKeyPrefix": "ProfilesObject:"
        ///   "partitionKeyOwnerValueRule": "Beihai=true;EndToEndTests=true"
        ///  Activity Operation
        ///     The activity iterates through all the rows from the input table with the matching rowKeyPrefixes,
        ///     checks if the column is present, updates the column value if the partition key belongs to the app handles
        ///     associated with the owner specified in partitionKeyOwnerValueRule
        /// </summary>
        /// <param name="linkedServices">Linked services referenced by activity definition.</param>
        /// <param name="datasets">Datasets referenced by activity definition.</param>
        /// <param name="activity">Activity definition.</param>
        /// <param name="logger">Used to log messages during activity execution.</param>
        /// <returns>Activity state at the end of execution</returns>
        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Activity activity,
            IActivityLogger logger)
        {
            DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties;
            IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties;

            logger.Write("Logging extended properties if any...");
            foreach (KeyValuePair <string, string> entry in extendedProperties)
            {
                logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value);
            }

            if (!extendedProperties.ContainsKey("columnName"))
            {
                throw new ArgumentException("Column name is required", "columnName");
            }

            string columnName = extendedProperties["columnName"];

            if (!extendedProperties.ContainsKey("columnType"))
            {
                throw new ArgumentException("Column Type information is required", "columnType");
            }

            string columnType = extendedProperties["columnType"];

            // Note that partitionKeyOwnerValueRule is required as the rules for updating value comes from it
            // We do not update column value with default value if the matching rule is not found. The record is ignored. All rules need to be explicitly specified
            if (!extendedProperties.ContainsKey("partitionKeyOwnerValueRule"))
            {
                throw new ArgumentException("PartitionKeyOwnerValueRule information is required", "partitionKeyOwnerValueRule");
            }

            string partitionKeyOwnerValueRule = extendedProperties["partitionKeyOwnerValueRule"];

            string[] rowKeyPrefixes = null;
            if (extendedProperties.ContainsKey("rowKeyPrefixes"))
            {
                rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(',');
            }

            var partitionKeyOwnerValueRuleDict = partitionKeyOwnerValueRule.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries)
                                                 .Select(part => part.Split('='))
                                                 .ToDictionary(split => split[0], split => split[1]);

            var appHandles = ownerAppHandles.Where(item => partitionKeyOwnerValueRuleDict.ContainsKey(item.Key)).SelectMany(item => item.Value).ToList();

            logger.Write("Matching appHandles:{0}", string.Join(",", appHandles));

            AzureStorageLinkedService inputLinkedService;
            AzureTableDataset         sourceTable;

            // For activities working on a single dataset, the first entry is the input dataset.
            // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets
            Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name);

            sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("input table:{0}", sourceTable.TableName);

            inputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                inputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                 as AzureStorageLinkedService;
            string inputConnectionString = inputLinkedService.ConnectionString;

            // create storage client for input. Pass the connection string.
            CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString);
            CloudTableClient    inputTableClient    = inputStorageAccount.CreateCloudTableClient();
            CloudTable          inputTable          = inputTableClient.GetTableReference(sourceTable.TableName);

            long totalProcessedRecords = 0;
            long actualAffectedRecords = 0;
            TableContinuationToken tableContinuationToken = null;
            List <Task>            tasks = new List <Task>();

            do
            {
                var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken);
                tableContinuationToken = resultSegment.ContinuationToken;

                var partitionGroups = (from s in resultSegment.Results
                                       where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes)
                                       select s).GroupBy(a => a.PartitionKey);

                foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups)
                {
                    TableBatchOperation batch = new TableBatchOperation();
                    foreach (DynamicTableEntity e in g.AsEnumerable())
                    {
                        // If appHandles do not contain the partition key, Continue
                        if (!appHandles.Contains(e.PartitionKey))
                        {
                            continue;
                        }
                        else
                        {
                            // Pick the value to be used for specified AppHandle
                            // This is done by getting the owber key first from e.PartitionKey
                            var ownerKey = ownerAppHandles.FirstOrDefault(x => x.Value.Contains(e.PartitionKey)).Key;

                            // The owner key is used to pick the value for the column
                            string newColumnValue = partitionKeyOwnerValueRuleDict[ownerKey];
                            if (this.ReplaceColumnValue(e, columnName, columnType, newColumnValue))
                            {
                                batch.Merge(e);
                                logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey);
                            }
                        }
                    }

                    if (batch.Count > 0)
                    {
                        tasks.Add(inputTable.ExecuteBatchInChunkAsync(batch));
                        actualAffectedRecords += batch.Count;
                    }

                    logger.Write("Updated partition: {0}", g.Key);
                }

                totalProcessedRecords += resultSegment.Results.Count;
                logger.Write("Processed records count: {0}", totalProcessedRecords);
                logger.Write("Affected records count: {0}", actualAffectedRecords);
            }while (tableContinuationToken != null);

            Task.WaitAll(tasks.ToArray());
            logger.Write("Updated {0} records", actualAffectedRecords);

            return(new Dictionary <string, string>());
        }
        /// <summary>
        /// Transforms Azure table partition key
        /// The partition key to be transformed is specified using the following extended properties
        /// Extended Properties
        ///     ifPartitionKeyContains - The transformation is applied only if the contents of partition key contains the specified value.
        ///     replacePartitionKeySubStrWith - Replace the contents of the matched partition key with the specified value to generate a new partition key.
        ///     rowKeyPrefixes - Rowkey prefixes of the rows in which the partition key transformation will be applied. This is optional and will identify the subset of rows to do this operation.
        /// ifPartitionKeyContains,replacePartitionKeySubStrWith are mandatory
        /// Extended Properties Example
        ///   "ifPartitionKeyContains": "Beihai",
        ///   "replacePartitionKeySubStrWith": "AADS2S"
        ///  Activity Operation
        ///     The activity iterates through all the rows from the input table with the matching rowKeyPrefixes,
        ///     checks for the partition key update, apply the partition key transformation if the partition key match is found
        ///     runs an insert operation for entities with new partition key and delete operation on existing entities with matching partition keys
        /// </summary>
        /// <param name="linkedServices">Linked services referenced by activity definition.</param>
        /// <param name="datasets">Datasets referenced by activity definition.</param>
        /// <param name="activity">Activity definition.</param>
        /// <param name="logger">Used to log messages during activity execution.</param>
        /// <returns>Activity state at the end of execution</returns>
        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Microsoft.Azure.Management.DataFactories.Models.Activity activity,
            IActivityLogger logger)
        {
            DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties;
            IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties;

            logger.Write("Logging extended properties if any...");
            foreach (KeyValuePair <string, string> entry in extendedProperties)
            {
                logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value);
            }

            string[] rowKeyPrefixes = null;
            if (extendedProperties.ContainsKey("rowKeyPrefixes"))
            {
                rowKeyPrefixes = extendedProperties["rowKeyPrefixes"].Split(',');
            }

            if (!extendedProperties.ContainsKey("ifPartitionKeyContains"))
            {
                throw new ArgumentException("Partition key match criteria is required", "ifPartitionKeyContains");
            }

            if (!extendedProperties.ContainsKey("replacePartitionKeySubStrWith"))
            {
                throw new ArgumentException("Partition key substring replacement value is required", "replacePartitionKeySubStrWith");
            }

            string ifPartitionKeyContains        = extendedProperties["ifPartitionKeyContains"];
            string replacePartitionKeySubStrWith = extendedProperties["replacePartitionKeySubStrWith"];

            AzureStorageLinkedService inputLinkedService;
            AzureTableDataset         sourceTable;

            // For activities working on a single dataset, the first entry is the input dataset.
            // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets
            Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name);

            sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("input table:{0}", sourceTable.TableName);

            inputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                inputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                 as AzureStorageLinkedService;
            string inputConnectionString = inputLinkedService.ConnectionString;

            // create storage client for input. Pass the connection string.
            CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString);
            CloudTableClient    inputTableClient    = inputStorageAccount.CreateCloudTableClient();
            CloudTable          inputTable          = inputTableClient.GetTableReference(sourceTable.TableName);

            long totalProcessedRecords = 0;
            long actualAffectedRecords = 0;
            TableContinuationToken tableContinuationToken = null;
            List <Task>            tasks = new List <Task>();

            do
            {
                var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken);
                tableContinuationToken = resultSegment.ContinuationToken;

                var partitionGroups = (from s in resultSegment.Results
                                       where (rowKeyPrefixes == null || rowKeyPrefixes.Length <= 0) ? true : this.IsMatch(s.RowKey, rowKeyPrefixes)
                                       select s).GroupBy(a => a.PartitionKey);

                foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups)
                {
                    TableBatchOperation deleteBatch = new TableBatchOperation();
                    TableBatchOperation insertBatch = new TableBatchOperation();
                    foreach (DynamicTableEntity e in g.AsEnumerable())
                    {
                        if (!e.PartitionKey.Contains(ifPartitionKeyContains))
                        {
                            continue;
                        }

                        DynamicTableEntity newEntity = new DynamicTableEntity(
                            e.PartitionKey.Replace(ifPartitionKeyContains, replacePartitionKeySubStrWith),
                            e.RowKey);
                        foreach (KeyValuePair <string, EntityProperty> property in e.Properties)
                        {
                            newEntity.Properties.Add(property);
                        }

                        insertBatch.InsertOrReplace(newEntity);
                        deleteBatch.Delete(e);
                        actualAffectedRecords++;
                    }

                    if (insertBatch.Count > 0)
                    {
                        tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(insertBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger));
                    }

                    if (deleteBatch.Count > 0)
                    {
                        tasks.Add(this.RetryOnStorageTimeout(inputTable.ExecuteBatchInChunkAsync(deleteBatch), numRetriesOnTimeout, numMsDelayOnTimeout, logger));
                    }

                    logger.Write("Updated partition: {0}", g.Key);
                }

                totalProcessedRecords += resultSegment.Results.Count;
                logger.Write("Processed records count: {0}", totalProcessedRecords);
                logger.Write("Affected records count: {0}", actualAffectedRecords);
            }while (tableContinuationToken != null);

            Task.WaitAll(tasks.ToArray());
            logger.Write("Updated {0} records", actualAffectedRecords);

            return(new Dictionary <string, string>());
        }
Example #5
0
        /// <summary>
        /// Method copies an Azure table
        /// The table to be copied is same as the input table from the dataset
        /// Extended Properties
        ///     ignore - Name of the columns (comma separated) to be ignored as part of copy operation. This is an optional paramater.
        ///  Activity Operation
        ///     The activity iterates through all the rows from the input table,
        ///     checks for the column to be ignored, remove the ignored columns is found,
        ///     runs an InsertOrReplace table operation to insert or replace the contents of
        ///     row/entity in the table.
        /// </summary>
        /// <param name="linkedServices">Linked services referenced by activity definition.</param>
        /// <param name="datasets">Datasets referenced by activity definition.</param>
        /// <param name="activity">Activity definition.</param>
        /// <param name="logger">Used to log messages during activity execution.</param>
        /// <returns>Activity state at the end of execution</returns>
        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Activity activity,
            IActivityLogger logger)
        {
            DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties;
            IDictionary <string, string> extendedProperties = dotNetActivity.ExtendedProperties;

            string[] columnsToIgnore = null;

            // extendedProperties are optional for this activity
            if (extendedProperties != null)
            {
                logger.Write("Logging extended properties if any...");
                foreach (KeyValuePair <string, string> entry in extendedProperties)
                {
                    logger.Write("<key:{0}> <value:{1}>", entry.Key, entry.Value);
                }

                if (extendedProperties.ContainsKey("ignore"))
                {
                    columnsToIgnore = extendedProperties["ignore"].Split(',');
                }
            }

            AzureStorageLinkedService inputLinkedService, outputLinkedService;
            AzureTableDataset         sourceTable, destinationTable;

            // For activities working on a single dataset, the first entry is the input dataset.
            // The activity.Inputs can have multiple datasets for building pipeline workflow dependencies. We can ignore the rest of the datasets
            Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name);

            sourceTable = inputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("input table:{0}", sourceTable.TableName);

            Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name);

            destinationTable = outputDataset.Properties.TypeProperties as AzureTableDataset;

            logger.Write("output table:{0}", destinationTable.TableName);

            inputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                inputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                 as AzureStorageLinkedService;
            string inputConnectionString = inputLinkedService.ConnectionString;

            outputLinkedService = linkedServices.First(
                ls =>
                ls.Name ==
                outputDataset.Properties.LinkedServiceName).Properties.TypeProperties
                                  as AzureStorageLinkedService;
            string outputConnectionString = outputLinkedService.ConnectionString;

            // create storage client for input. Pass the connection string.
            CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(inputConnectionString);
            CloudTableClient    inputTableClient    = inputStorageAccount.CreateCloudTableClient();
            CloudTable          inputTable          = inputTableClient.GetTableReference(sourceTable.TableName);

            CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(outputConnectionString);
            CloudTableClient    outputTableClient    = outputStorageAccount.CreateCloudTableClient();
            CloudTable          outputTable          = outputTableClient.GetTableReference(destinationTable.TableName);

            if (!outputTable.Exists())
            {
                outputTable.Create();
            }

            long totalProcessedRecords = 0;
            TableContinuationToken tableContinuationToken = null;
            List <Task>            tasks = new List <Task>();

            do
            {
                var resultSegment = inputTable.ExecuteQuerySegmented(new TableQuery(), tableContinuationToken);
                tableContinuationToken = resultSegment.ContinuationToken;

                var partitionGroups = (from s in resultSegment.Results
                                       select s).GroupBy(a => a.PartitionKey);

                foreach (IGrouping <string, DynamicTableEntity> g in partitionGroups)
                {
                    TableBatchOperation batch = new TableBatchOperation();
                    foreach (DynamicTableEntity e in g.AsEnumerable())
                    {
                        if (columnsToIgnore != null && columnsToIgnore.Length > 0)
                        {
                            foreach (string column in columnsToIgnore)
                            {
                                if (e.Properties.ContainsKey(column))
                                {
                                    e.Properties.Remove(column);
                                }
                            }
                        }

                        batch.InsertOrReplace(e);
                        logger.Write("<partition key:{0}>, <row key:{1}>", e.PartitionKey, e.RowKey);
                    }

                    if (batch.Count > 0)
                    {
                        tasks.Add(outputTable.ExecuteBatchInChunkAsync(batch));
                    }

                    logger.Write("Copied data for partition: {0}", g.Key);
                }

                // In case of Copy, number of processed and affected records is the same
                totalProcessedRecords += resultSegment.Results.Count;
                logger.Write("Processed records count: {0}", totalProcessedRecords);
            }while (tableContinuationToken != null);

            // The batch operations complete when Task.WaitAll completes
            Task.WaitAll(tasks.ToArray());
            logger.Write("Copied {0} records from {1} to {2}", totalProcessedRecords, sourceTable.TableName, destinationTable.TableName);

            return(new Dictionary <string, string>());
        }