public override bool Equals(object obj) { PartitionRowKey other = obj as PartitionRowKey; if (other == null) { return(false); } return((this.PartitionKey == other.PartitionKey) && (this.RowKey == other.RowKey)); }
// Write a DataTable to an AzureTable. // DataTable's Rows are an unstructured property bag. // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns. public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, PartitionRowKey> funcComputeKeys) { if (table == null) { throw new ArgumentNullException("table"); } if (account == null) { throw new ArgumentNullException("account"); } if (columnTypes == null) { throw new ArgumentNullException("columnTypes"); } if (tableName == null) { throw new ArgumentNullException("tableName"); } ValidateAzureTableName(tableName); // Azure tables have "special" columns. // We can skip these by settings columnType[i] to null, which means don't write that column string[] columnNames = table.ColumnNames.ToArray(); if (columnNames.Length != columnTypes.Length) { throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes"); } columnTypes = columnTypes.ToArray(); // create a copy for mutation. for (int i = 0; i < columnNames.Length; i++) { if (IsSpecialColumnName(columnNames[i])) { columnTypes[i] = null; } } if (funcComputeKeys == null) { funcComputeKeys = GetPartitionRowKeyFunc(columnNames); } // Validate columnTypes string [] edmTypeNames = Array.ConvertAll(columnTypes, columnType => { if (columnType == null) { return null; } string edmTypeName; _edmNameMapping.TryGetValue(columnType, out edmTypeName); if (edmTypeName == null) { // Unsupported type! throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName)); } return edmTypeName; }); var tableClient = account.CreateCloudTableClient(); var tableReference = tableClient.GetTableReference(tableName); if (tableReference.Exists()) { tableReference.Delete(); } tableReference.Create(); GenericTableWriter w = new GenericTableWriter { _edmTypeNames = edmTypeNames, _columnNames = table.ColumnNames.ToArray() }; // Batch rows for performance, // but all rows in the batch must have the same partition key TableServiceContext ctx = null; string lastPartitionKey = null; HashSet<PartitionRowKey> dups = new HashSet<PartitionRowKey>(); int rowCounter = 0; int batchSize = 0; foreach (Row row in table.Rows) { GenericWriterEntity entity = new GenericWriterEntity { _source = row }; // Compute row and partition keys too. var partRow = funcComputeKeys(rowCounter, row); entity.PartitionKey = partRow.PartitionKey; entity.RowKey = partRow.RowKey; rowCounter++; // but all rows in the batch must have the same partition key if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey)) { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); ctx = null; } if (ctx == null) { dups.Clear(); lastPartitionKey = null; ctx = tableClient.GetTableServiceContext(); ctx.Format.UseAtom(); ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity); batchSize = 0; } // Add enty to the current batch // Upsert means insert+Replace. But still need uniqueness within a batch. bool allowUpsert = true; // Check for dups within a batch. var key = new PartitionRowKey { PartitionKey = entity.PartitionKey, RowKey = entity.RowKey }; bool dupWithinBatch = dups.Contains(key); dups.Add(key); if (allowUpsert) { // Upsert allows overwriting existing keys. But still must be unique within a batch. if (!dupWithinBatch) { ctx.AttachTo(tableName, entity); ctx.UpdateObject(entity); } } else { // AddObject requires uniquess. if (dupWithinBatch) { // Azure REST APIs will give us a horrible cryptic error (400 with no message). // Provide users a useful error instead. throw new InvalidOperationException(string.Format("Table has duplicate keys: {0}", key)); } ctx.AddObject(tableName, entity); } lastPartitionKey = entity.PartitionKey; batchSize++; if (batchSize % UploadBatchSize == 0) { // Beware, if keys collide within a batch, we get a very cryptic error and 400. // If they collide across batches, we get a more useful 409 (conflict). try { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); } catch (DataServiceRequestException de) { var e = de.InnerException as DataServiceClientException; if (e != null) { if (e.StatusCode == 409) { // Conflict. Duplicate keys. We don't get the specific duplicate key. // Server shouldn't do this if we support upsert. // (although an old emulator that doesn't yet support upsert may throw it). throw new InvalidOperationException(string.Format("Table has duplicate keys. {0}", e.Message)); } } } ctx = null; } } if (ctx != null) { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); } }
// Write a DataTable to an AzureTable. // DataTable's Rows are an unstructured property bag. // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns. public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func <int, Row, PartitionRowKey> funcComputeKeys) { if (table == null) { throw new ArgumentNullException("table"); } if (account == null) { throw new ArgumentNullException("account"); } if (columnTypes == null) { throw new ArgumentNullException("columnTypes"); } if (tableName == null) { throw new ArgumentNullException("tableName"); } ValidateAzureTableName(tableName); // Azure tables have "special" columns. // We can skip these by settings columnType[i] to null, which means don't write that column string[] columnNames = table.ColumnNames.ToArray(); if (columnNames.Length != columnTypes.Length) { throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes"); } columnTypes = columnTypes.ToArray(); // create a copy for mutation. for (int i = 0; i < columnNames.Length; i++) { if (IsSpecialColumnName(columnNames[i])) { columnTypes[i] = null; } } if (funcComputeKeys == null) { funcComputeKeys = GetPartitionRowKeyFunc(columnNames); } // Validate columnTypes string [] edmTypeNames = Array.ConvertAll(columnTypes, columnType => { if (columnType == null) { return(null); } string edmTypeName; _edmNameMapping.TryGetValue(columnType, out edmTypeName); if (edmTypeName == null) { // Unsupported type! throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName)); } return(edmTypeName); }); CloudTableClient tableClient = account.CreateCloudTableClient(); tableClient.DeleteTableIfExist(tableName); tableClient.CreateTableIfNotExist(tableName); GenericTableWriter w = new GenericTableWriter { _edmTypeNames = edmTypeNames, _columnNames = table.ColumnNames.ToArray() }; // Batch rows for performance, // but all rows in the batch must have the same partition key TableServiceContext ctx = null; string lastPartitionKey = null; HashSet <PartitionRowKey> dups = new HashSet <PartitionRowKey>(); int rowCounter = 0; int batchSize = 0; foreach (Row row in table.Rows) { GenericWriterEntity entity = new GenericWriterEntity { _source = row }; // Compute row and partition keys too. var partRow = funcComputeKeys(rowCounter, row); entity.PartitionKey = partRow.PartitionKey; entity.RowKey = partRow.RowKey; rowCounter++; // but all rows in the batch must have the same partition key if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey)) { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); ctx = null; } if (ctx == null) { dups.Clear(); lastPartitionKey = null; ctx = tableClient.GetDataServiceContext(); ctx.WritingEntity += new EventHandler <ReadingWritingEntityEventArgs>(w.ctx_WritingEntity); batchSize = 0; } // Add enty to the current batch // Upsert means insert+Replace. But still need uniqueness within a batch. bool allowUpsert = true; // Check for dups within a batch. var key = new PartitionRowKey { PartitionKey = entity.PartitionKey, RowKey = entity.RowKey }; bool dupWithinBatch = dups.Contains(key); dups.Add(key); if (allowUpsert) { // Upsert allows overwriting existing keys. But still must be unique within a batch. if (!dupWithinBatch) { ctx.AttachTo(tableName, entity); ctx.UpdateObject(entity); } } else { // AddObject requires uniquess. if (dupWithinBatch) { // Azure REST APIs will give us a horrible cryptic error (400 with no message). // Provide users a useful error instead. throw new InvalidOperationException(string.Format("Table has duplicate keys: {0}", key)); } ctx.AddObject(tableName, entity); } lastPartitionKey = entity.PartitionKey; batchSize++; if (batchSize % UploadBatchSize == 0) { // Beware, if keys collide within a batch, we get a very cryptic error and 400. // If they collide across batches, we get a more useful 409 (conflict). try { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); } catch (DataServiceRequestException de) { var e = de.InnerException as DataServiceClientException; if (e != null) { if (e.StatusCode == 409) { // Conflict. Duplicate keys. We don't get the specific duplicate key. // Server shouldn't do this if we support upsert. // (although an old emulator that doesn't yet support upsert may throw it). throw new InvalidOperationException(string.Format("Table has duplicate keys. {0}", e.Message)); } } } ctx = null; } } if (ctx != null) { ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate); } }