Пример #1
0
 /// <summary>
 /// Save the datatable up to an AzureTable. Overwrite if the azure table already exists.
 /// </summary>
 /// <param name="table">source table to save</param>
 /// <param name="account">azure storage account</param>
 /// <param name="tableName">name of azure table to write to. </param>
 /// <param name="columnTypes">parallel array to table.ColumnNames.
 /// Strong typing for the columns in the azure table. Column i is skipped if columnTypes[i] is null.
 /// ColumnTypes should be types that can be normalized to OData (string,byte,sbyte,i16,i32,i64,double,single,boolean,decimal, datetime, guid).
 /// </param>
 /// <param name="funcComputeKeys">function to compute the partion and row keys for each row. </param>
 public static void SaveToAzureTable(this DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func <int, Row, ParitionRowKey> funcComputeKeys)
 {
     GenericTableWriter.SaveToAzureTable(table, account, tableName, columnTypes, funcComputeKeys);
 }
Пример #2
0
        // Write a DataTable to an AzureTable.
        // DataTable's Rows are an unstructured property bag.
        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, PartitionRowKey> funcComputeKeys)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (account == null)
            {
                throw new ArgumentNullException("account");
            }
            if (columnTypes == null)
            {
                throw new ArgumentNullException("columnTypes");
            }
            if (tableName == null)
            {
                throw new ArgumentNullException("tableName");
            }
            ValidateAzureTableName(tableName);

            // Azure tables have "special" columns.
            // We can skip these by settings columnType[i] to null, which means don't write that column
            string[] columnNames = table.ColumnNames.ToArray();
            if (columnNames.Length != columnTypes.Length)
            {
                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
            }

            columnTypes = columnTypes.ToArray(); // create a copy for mutation.
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (IsSpecialColumnName(columnNames[i]))
                {
                    columnTypes[i] = null;
                }
            }

            if (funcComputeKeys == null)
            {
                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
            }

            // Validate columnTypes
            string [] edmTypeNames = Array.ConvertAll(columnTypes,
                 columnType => {
                     if (columnType == null)
                     {
                         return null;
                     }
                     string edmTypeName;
                     _edmNameMapping.TryGetValue(columnType, out edmTypeName);
                     if (edmTypeName == null)
                     {
                         // Unsupported type!
                         throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
                     }
                     return edmTypeName;
                 });

            var tableClient = account.CreateCloudTableClient();
            var tableReference = tableClient.GetTableReference(tableName);

            if (tableReference.Exists())
            {
                tableReference.Delete();
            }

            tableReference.Create();

            GenericTableWriter w = new GenericTableWriter
            {
                _edmTypeNames = edmTypeNames,
                _columnNames = table.ColumnNames.ToArray()
            };

            // Batch rows for performance,
            // but all rows in the batch must have the same partition key
            TableServiceContext ctx = null;
            string lastPartitionKey = null;

            HashSet<PartitionRowKey> dups = new HashSet<PartitionRowKey>();

            int rowCounter = 0;
            int batchSize = 0;
            foreach (Row row in table.Rows)
            {
                GenericWriterEntity entity = new GenericWriterEntity { _source = row };
                // Compute row and partition keys too.
                var partRow = funcComputeKeys(rowCounter, row);
                entity.PartitionKey = partRow.PartitionKey;
                entity.RowKey = partRow.RowKey;
                rowCounter++;

                // but all rows in the batch must have the same partition key
                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }

                if (ctx == null)
                {
                    dups.Clear();
                    lastPartitionKey = null;
                    ctx = tableClient.GetTableServiceContext();
                    ctx.Format.UseAtom();
                    ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
                    batchSize = 0;
                }

                // Add enty to the current batch
                // Upsert means insert+Replace. But still need uniqueness within a batch.
                bool allowUpsert = true;

                // Check for dups within a batch.
                var key = new PartitionRowKey { PartitionKey = entity.PartitionKey, RowKey = entity.RowKey };
                bool dupWithinBatch = dups.Contains(key);
                dups.Add(key);

                if (allowUpsert)
                {
                    // Upsert allows overwriting existing keys. But still must be unique within a batch.
                    if (!dupWithinBatch)
                    {
                        ctx.AttachTo(tableName, entity);
                        ctx.UpdateObject(entity);
                    }
                }
                else
                {
                    // AddObject requires uniquess.
                    if (dupWithinBatch)
                    {
                        // Azure REST APIs will give us a horrible cryptic error (400 with no message).
                        // Provide users a useful error instead.
                        throw new InvalidOperationException(string.Format("Table has duplicate keys: {0}", key));
                    }

                    ctx.AddObject(tableName, entity);
                }

                lastPartitionKey = entity.PartitionKey;
                batchSize++;

                if (batchSize % UploadBatchSize == 0)
                {
                    // Beware, if keys collide within a batch, we get a very cryptic error and 400.
                    // If they collide across batches, we get a more useful 409 (conflict).
                    try
                    {
                        ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    }
                    catch (DataServiceRequestException de)
                    {
                        var e = de.InnerException as DataServiceClientException;
                        if (e != null)
                        {
                            if (e.StatusCode == 409)
                            {
                                // Conflict. Duplicate keys. We don't get the specific duplicate key.
                                // Server shouldn't do this if we support upsert.
                                // (although an old emulator that doesn't yet support upsert may throw it).
                                throw new InvalidOperationException(string.Format("Table has duplicate keys. {0}", e.Message));
                            }
                        }
                    }
                    ctx = null;
                }
            }

            if (ctx != null)
            {
                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
            }
        }
Пример #3
0
        // Write a DataTable to an AzureTable.
        // DataTable's Rows are an unstructured property bag.
        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func <int, Row, ParitionRowKey> funcComputeKeys)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (account == null)
            {
                throw new ArgumentNullException("account");
            }
            if (columnTypes == null)
            {
                throw new ArgumentNullException("columnTypes");
            }
            if (tableName == null)
            {
                throw new ArgumentNullException("tableName");
            }
            ValidateAzureTableName(tableName);

            // Azure tables have "special" columns.
            // We can skip these by settings columnType[i] to null, which means don't write that column
            string[] columnNames = table.ColumnNames.ToArray();
            if (columnNames.Length != columnTypes.Length)
            {
                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
            }

            columnTypes = columnTypes.ToArray(); // create a copy for mutation.
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (IsSpecialColumnName(columnNames[i]))
                {
                    columnTypes[i] = null;
                }
            }

            if (funcComputeKeys == null)
            {
                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
            }

            // Validate columnTypes
            string [] edmTypeNames = Array.ConvertAll(columnTypes,
                                                      columnType => {
                if (columnType == null)
                {
                    return(null);
                }
                string edmTypeName;
                _edmNameMapping.TryGetValue(columnType, out edmTypeName);
                if (edmTypeName == null)
                {
                    // Unsupported type!
                    throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
                }
                return(edmTypeName);
            });


            CloudTableClient tableClient = account.CreateCloudTableClient();

            tableClient.DeleteTableIfExist(tableName);
            tableClient.CreateTableIfNotExist(tableName);


            GenericTableWriter w = new GenericTableWriter
            {
                _edmTypeNames = edmTypeNames,
                _columnNames  = table.ColumnNames.ToArray()
            };

            // Batch rows for performance,
            // but all rows in the batch must have the same partition key
            TableServiceContext ctx = null;
            string lastPartitionKey = null;

            int rowCounter = 0;
            int batchSize  = 0;

            foreach (Row row in table.Rows)
            {
                GenericWriterEntity entity = new GenericWriterEntity {
                    _source = row
                };
                // Compute row and partition keys too.
                var partRow = funcComputeKeys(rowCounter, row);
                entity.PartitionKey = partRow.PartitionKey;
                entity.RowKey       = partRow.RowKey;
                rowCounter++;

                // but all rows in the batch must have the same partition key
                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }

                if (ctx == null)
                {
                    lastPartitionKey = null;
                    ctx = tableClient.GetDataServiceContext();
                    ctx.WritingEntity += new EventHandler <ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
                    batchSize          = 0;
                }

                // Add enty to the current batch
                ctx.AddObject(tableName, entity);
                lastPartitionKey = entity.PartitionKey;
                batchSize++;

                if (batchSize % 50 == 0)
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }
            }

            if (ctx != null)
            {
                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
            }
        }
Пример #4
0
        // Write a DataTable to an AzureTable.
        // DataTable's Rows are an unstructured property bag.
        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func <int, Row, PartitionRowKey> funcComputeKeys)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (account == null)
            {
                throw new ArgumentNullException("account");
            }
            if (columnTypes == null)
            {
                throw new ArgumentNullException("columnTypes");
            }
            if (tableName == null)
            {
                throw new ArgumentNullException("tableName");
            }
            ValidateAzureTableName(tableName);

            // Azure tables have "special" columns.
            // We can skip these by settings columnType[i] to null, which means don't write that column
            string[] columnNames = table.ColumnNames.ToArray();
            if (columnNames.Length != columnTypes.Length)
            {
                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
            }

            columnTypes = columnTypes.ToArray(); // create a copy for mutation.
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (IsSpecialColumnName(columnNames[i]))
                {
                    columnTypes[i] = null;
                }
            }

            if (funcComputeKeys == null)
            {
                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
            }

            // Validate columnTypes
            string [] edmTypeNames = Array.ConvertAll(columnTypes,
                                                      columnType => {
                if (columnType == null)
                {
                    return(null);
                }
                string edmTypeName;
                _edmNameMapping.TryGetValue(columnType, out edmTypeName);
                if (edmTypeName == null)
                {
                    // Unsupported type!
                    throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
                }
                return(edmTypeName);
            });


            CloudTableClient tableClient = account.CreateCloudTableClient();

            tableClient.DeleteTableIfExist(tableName);
            tableClient.CreateTableIfNotExist(tableName);


            GenericTableWriter w = new GenericTableWriter
            {
                _edmTypeNames = edmTypeNames,
                _columnNames  = table.ColumnNames.ToArray()
            };

            // Batch rows for performance,
            // but all rows in the batch must have the same partition key
            TableServiceContext ctx = null;
            string lastPartitionKey = null;

            HashSet <PartitionRowKey> dups = new HashSet <PartitionRowKey>();

            int rowCounter = 0;
            int batchSize  = 0;

            foreach (Row row in table.Rows)
            {
                GenericWriterEntity entity = new GenericWriterEntity {
                    _source = row
                };
                // Compute row and partition keys too.
                var partRow = funcComputeKeys(rowCounter, row);
                entity.PartitionKey = partRow.PartitionKey;
                entity.RowKey       = partRow.RowKey;
                rowCounter++;

                // but all rows in the batch must have the same partition key
                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }

                if (ctx == null)
                {
                    dups.Clear();
                    lastPartitionKey = null;
                    ctx = tableClient.GetDataServiceContext();
                    ctx.WritingEntity += new EventHandler <ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
                    batchSize          = 0;
                }

                // Add enty to the current batch
                // Upsert means insert+Replace. But still need uniqueness within a batch.
                bool allowUpsert = true;

                // Check for dups within a batch.
                var key = new PartitionRowKey {
                    PartitionKey = entity.PartitionKey, RowKey = entity.RowKey
                };
                bool dupWithinBatch = dups.Contains(key);
                dups.Add(key);


                if (allowUpsert)
                {
                    // Upsert allows overwriting existing keys. But still must be unique within a batch.
                    if (!dupWithinBatch)
                    {
                        ctx.AttachTo(tableName, entity);
                        ctx.UpdateObject(entity);
                    }
                }
                else
                {
                    // AddObject requires uniquess.
                    if (dupWithinBatch)
                    {
                        // Azure REST APIs will give us a horrible cryptic error (400 with no message).
                        // Provide users a useful error instead.
                        throw new InvalidOperationException(string.Format("Table has duplicate keys: {0}", key));
                    }

                    ctx.AddObject(tableName, entity);
                }


                lastPartitionKey = entity.PartitionKey;
                batchSize++;

                if (batchSize % UploadBatchSize == 0)
                {
                    // Beware, if keys collide within a batch, we get a very cryptic error and 400.
                    // If they collide across batches, we get a more useful 409 (conflict).
                    try
                    {
                        ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    }
                    catch (DataServiceRequestException de)
                    {
                        var e = de.InnerException as DataServiceClientException;
                        if (e != null)
                        {
                            if (e.StatusCode == 409)
                            {
                                // Conflict. Duplicate keys. We don't get the specific duplicate key.
                                // Server shouldn't do this if we support upsert.
                                // (although an old emulator that doesn't yet support upsert may throw it).
                                throw new InvalidOperationException(string.Format("Table has duplicate keys. {0}", e.Message));
                            }
                        }
                    }
                    ctx = null;
                }
            }

            if (ctx != null)
            {
                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
            }
        }
Пример #5
0
        // Write a DataTable to an AzureTable.
        // DataTable's Rows are an unstructured property bag.
        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, ParitionRowKey> funcComputeKeys)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (account == null)
            {
                throw new ArgumentNullException("account");
            }
            if (columnTypes == null)
            {
                throw new ArgumentNullException("columnTypes");
            }
            if (tableName == null)
            {
                throw new ArgumentNullException("tableName");
            }
            ValidateAzureTableName(tableName);

            // Azure tables have "special" columns.
            // We can skip these by settings columnType[i] to null, which means don't write that column
            string[] columnNames = table.ColumnNames.ToArray();
            if (columnNames.Length != columnTypes.Length)
            {
                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
            }

            columnTypes = columnTypes.ToArray(); // create a copy for mutation.
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (IsSpecialColumnName(columnNames[i]))
                {
                    columnTypes[i] = null;
                }
            }

            if (funcComputeKeys == null)
            {
                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
            }

            // Validate columnTypes
            string [] edmTypeNames = Array.ConvertAll(columnTypes,
                 columnType => {
                     if (columnType == null)
                     {
                         return null;
                     }
                     string edmTypeName;
                     _edmNameMapping.TryGetValue(columnType, out edmTypeName);
                     if (edmTypeName == null)
                     {
                         // Unsupported type!
                         throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
                     }
                     return edmTypeName;
                 });

            CloudTableClient tableClient = account.CreateCloudTableClient();

            tableClient.DeleteTableIfExist(tableName);
            tableClient.CreateTableIfNotExist(tableName);

            GenericTableWriter w = new GenericTableWriter
            {
                _edmTypeNames = edmTypeNames,
                _columnNames = table.ColumnNames.ToArray()
            };

            // Batch rows for performance,
            // but all rows in the batch must have the same partition key
            TableServiceContext ctx = null;
            string lastPartitionKey = null;

            int rowCounter = 0;
            int batchSize = 0;
            foreach (Row row in table.Rows)
            {
                GenericWriterEntity entity = new GenericWriterEntity { _source = row };
                // Compute row and partition keys too.
                var partRow = funcComputeKeys(rowCounter, row);
                entity.PartitionKey = partRow.PartitionKey;
                entity.RowKey = partRow.RowKey;
                rowCounter++;

                // but all rows in the batch must have the same partition key
                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }

                if (ctx == null)
                {
                    lastPartitionKey = null;
                    ctx = tableClient.GetDataServiceContext();
                    ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
                    batchSize = 0;
                }

                // Add enty to the current batch
                ctx.AddObject(tableName, entity);
                lastPartitionKey = entity.PartitionKey;
                batchSize++;

                if (batchSize % 50 == 0)
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }
            }

            if (ctx != null)
            {
                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
            }
        }