public BlobResultSegment ListBlobsSegmented(string prefix, bool useFlatListing, BlobListingDetails blobListingDetails, int?maxResults, BlobContinuationToken continuationToken, BlobRequestOptions blobRequestOptions, OperationContext operationContext) { return(_client.ListBlobsSegmented(prefix, useFlatListing, blobListingDetails, maxResults, continuationToken, blobRequestOptions, operationContext)); }
/// <summary> /// Execute method is the only method of IDotNetActivity interface you must implement. /// In this sample, the method invokes the DeleteBlob method to perform the core logic. /// </summary> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // declare types for input and output data stores AzureStorageLinkedService inputLinkedService; Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.First().Name); foreach (LinkedService ls in linkedServices) { logger.Write("linkedService.Name {0}", ls.Name); } // using First method instead of Single since we are using the same // Azure Storage linked service for input and output. inputLinkedService = linkedServices.First( linkedService => linkedService.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; string connectionString = inputLinkedService.ConnectionString; // To create an input storage client. string folderPath = GetFolderPath(inputDataset); string output = string.Empty; // for use later. // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); // initialize the continuation token before using it in the do-while loop. BlobContinuationToken continuationToken = null; do { // get the list of input blobs from the input storage client object. BlobResultSegment blobList = inputClient.ListBlobsSegmented(folderPath, true, BlobListingDetails.Metadata, null, continuationToken, null, null); // delete any blobs reference from input output = DeleteBlob(blobList, logger, folderPath, ref continuationToken, "Microsoft"); } while (continuationToken != null); // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
public IDictionary <string, string> Execute(IEnumerable <ResolvedTable> filesToProcessTables, IEnumerable <ResolvedTable> transformedFilesTables, IDictionary <string, string> filesProperties, IActivityLogger logger) { try { string output = string.Empty; foreach (ResolvedTable filetoProcessTable in filesToProcessTables) { string storageConnectionString = GetConnectionString(filetoProcessTable.LinkedService); string folderPath = GetFolderPath(filetoProcessTable.Table); if (String.IsNullOrEmpty(storageConnectionString) || String.IsNullOrEmpty(folderPath)) { continue; } logger.Write(TraceEventType.Information, "Reading file from: {0}", folderPath); CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(storageConnectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); BlobContinuationToken continuationToken = null; do { BlobResultSegment result = inputClient.ListBlobsSegmented(folderPath, true, BlobListingDetails.All, null, continuationToken, null, null); foreach (IListBlobItem listBlobItem in result.Results) { CloudBlockBlob inputBlob = listBlobItem as CloudBlockBlob; string JSONOutputString = ""; string inputBlobString = ""; if (inputBlob != null) { /// Assumes metadata has blob type string blobType = inputBlob.Metadata["FileType"]; bool blobIsProcessed = false; /// Assumes if the blob has been processed metadata will be updated, you can also move the file to another location if required. if (!bool.TryParse(inputBlob.Metadata["IsProcessed"], out blobIsProcessed)) { blobIsProcessed = false; } if (blobIsProcessed == false) { IDictionary <string, string> blobMetadata = inputBlob.Metadata; // Identify the type of file using (StreamReader sr = new StreamReader(inputBlob.OpenRead())) { while (!sr.EndOfStream) { string line = sr.ReadLine(); inputBlobString = inputBlobString + line; } } // Call process xml if (blobType.ToLower() == "xml") { logger.Write(TraceEventType.Information, "Identified file {0} as XML", inputBlob.Name); JSONOutputString = GetJSONFromXML(inputBlobString, inputBlob.Metadata); logger.Write(TraceEventType.Information, "Processed file{0} XML to JSON", inputBlob.Name); } // Call process csv if (blobType.ToLower() == "csv") { logger.Write(TraceEventType.Information, "Identified file {0} as CSV", inputBlob.Name); JSONOutputString = GetJSONFromCSV(inputBlobString, inputBlob.Metadata); logger.Write(TraceEventType.Information, "Processed file{0} CSV to JSON", inputBlob.Name); } // Create JSON file foreach (ResolvedTable transformedFilesTable in transformedFilesTables) { string connectionString = GetConnectionString(transformedFilesTable.LinkedService); string outputfolderPath = GetFolderPath(transformedFilesTable.Table); if (String.IsNullOrEmpty(connectionString) || String.IsNullOrEmpty(folderPath)) { continue; } logger.Write(TraceEventType.Information, "Writing blob to: {0}", folderPath); string blobName = Guid.NewGuid().ToString(); CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(connectionString); Uri outputBlobUri = new Uri(outputStorageAccount.BlobEndpoint, outputfolderPath + "/" + blobName + ".json"); CloudBlockBlob outputBlob = new CloudBlockBlob(outputBlobUri, outputStorageAccount.Credentials); //Copying metadata as-is foreach (var metadata in inputBlob.Metadata) { outputBlob.Metadata.Add(metadata); } outputBlob.SetMetadata(); outputBlob.UploadText(JSONOutputString); } } } } } while (continuationToken != null); } } catch (Exception ex) { logger.Write(TraceEventType.Critical, "Fatal error details: {0}", ex.StackTrace); } return(new Dictionary <string, string>()); }
public IDictionary <string, string> Execute(IEnumerable <ResolvedTable> inputTables, IEnumerable <ResolvedTable> outputTables, IDictionary <string, string> properties, IActivityLogger logger) { this.logger = logger; try { var requestBin = properties["requestBinUrl"]; logger.Write(TraceEventType.Information, "RequestBinURL {0}", requestBin); var sliceStartTime = properties["sliceStart"]; var startTime = ParseSliceStartTime(sliceStartTime); using (HttpClient client = new HttpClient()) { foreach (var table in inputTables) { var connectionString = table.LinkedService.GetConnectionString(); var folder = table.Table.GetFolderPath(); if (folder == null || connectionString == null) { continue; } BlobContinuationToken continuationToken = null; CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); string output = string.Empty; do { BlobResultSegment result = inputClient.ListBlobsSegmented(folder, true, BlobListingDetails.Metadata, null, continuationToken, null, null); foreach (IListBlobItem listBlobItem in result.Results) { CloudBlockBlob inputBlob = listBlobItem as CloudBlockBlob; int count = 0; if (inputBlob != null) { using (StreamReader sr = new StreamReader(inputBlob.OpenRead())) { while (!sr.EndOfStream) { string line = sr.ReadLine(); if (count == 0) { logger.Write(TraceEventType.Information, "First line: [{0}]", line); } count++; } } } output += string.Format(CultureInfo.InvariantCulture, "{0},{1},{2},{3},{4}\n", folder, inputBlob.Name, count, Environment.MachineName, DateTime.UtcNow); } continuationToken = result.ContinuationToken; } while (continuationToken != null); var task = client.PostAsync(requestBin, new StringContent(output)); Task.WaitAll(task); } } } catch (Exception ex) { this.logger.Write(TraceEventType.Error, ex.ToString()); } return(new Dictionary <string, string>()); }
/// <summary> /// Execute method is the only method of IDotNetActivity interface you must implement. /// In this sample, the method invokes the Calculate method to perform the core logic. /// </summary> public IDictionary <string, string> Execute( IEnumerable <LinkedService> linkedServices, IEnumerable <Dataset> datasets, Activity activity, IActivityLogger logger) { // get extended properties defined in activity JSON definition // (for example: SliceStart) DotNetActivity dotNetActivity = (DotNetActivity)activity.TypeProperties; string sliceStartString = dotNetActivity.ExtendedProperties["SliceStart"]; // linked service for input and output data stores // in this example, same storage is used for both input/output AzureStorageLinkedService inputLinkedService; // get the input dataset Dataset inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name); // declare variables to hold type properties of input/output datasets AzureBlobDataset inputTypeProperties, outputTypeProperties; // get type properties from the dataset object inputTypeProperties = inputDataset.Properties.TypeProperties as AzureBlobDataset; // log linked services passed in linkedServices parameter // you will see two linked services of type: AzureStorageLinkedService // one for input dataset and the other for output dataset foreach (LinkedService ls in linkedServices) { logger.Write("linkedService.Name {0}", ls.Name); } // get the first Azure Storate linked service from linkedServices object // using First method instead of Single since we are using the same // Azure Storage linked service for input and output. inputLinkedService = linkedServices.First( linkedService => linkedService.Name == inputDataset.Properties.LinkedServiceName).Properties.TypeProperties as AzureStorageLinkedService; // get the connection string in the linked service string connectionString = inputLinkedService.ConnectionString; // get the folder path from the input dataset definition string folderPath = GetFolderPath(inputDataset); string output = string.Empty; // for use later. // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); // initialize the continuation token before using it in the do-while loop. BlobContinuationToken continuationToken = null; do { // get the list of input blobs from the input storage client object. BlobResultSegment blobList = inputClient.ListBlobsSegmented(folderPath, true, BlobListingDetails.Metadata, null, continuationToken, null, null); // Calculate method performs the core logic output = Calculate(blobList, logger, folderPath, ref continuationToken); } while (continuationToken != null); // get the output dataset using the name of the dataset matched to a name in the Activity output collection. Dataset outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name); // get type properties for the output dataset outputTypeProperties = outputDataset.Properties.TypeProperties as AzureBlobDataset; // get the folder path from the output dataset definition folderPath = GetFolderPath(outputDataset); // log the output folder path logger.Write("Writing blob to the folder: {0}", folderPath); // create a storage object for the output blob. CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(connectionString); // write the name of the file. Uri outputBlobUri = new Uri(outputStorageAccount.BlobEndpoint, folderPath + "/" + GetFileName(outputDataset)); // log the output file name logger.Write("output blob URI: {0}", outputBlobUri.ToString()); // create a blob and upload the output text. CloudBlockBlob outputBlob = new CloudBlockBlob(outputBlobUri, outputStorageAccount.Credentials); logger.Write("Writing {0} to the output blob", output); outputBlob.UploadText(output); // The dictionary can be used to chain custom activities together in the future. // This feature is not implemented yet, so just return an empty dictionary. return(new Dictionary <string, string>()); }
/// <summary> /// This sample executable is to demonstrate how custom DLL you wrote for the ADFv1 .NET Custom Activity /// can be rewritten to a custom executable file to be executed by ADFv2 Custom Activity. /// </summary> static void Main(string[] args) { Console.WriteLine("Start to execute custom activity V2"); // Parse activity and reference objects info from input files dynamic activity = JsonConvert.DeserializeObject(File.ReadAllText("activity.json")); dynamic linkedServices = JsonConvert.DeserializeObject(File.ReadAllText("linkedServices.json")); // Extract Connection String from LinkedService dynamic storageLinkedService = ((JArray)linkedServices).First(_ => "BatchStorageLinkedService".Equals(((dynamic)_).name.ToString())); string connectionString = storageLinkedService.properties.typeProperties.connectionString.value; // Extract InputFilePath & OutputFilePath from ExtendedProperties // In ADFv2, Input & Output Datasets are not required for Custom Activity. In this sample the folderName and // fileName properties are stored in ExtendedProperty of the Custom Activity like below. You are not required // to get the information from Datasets. //"extendedProperties": { // "InputFolderPath": "batchjobs/filestocheck", // "OutputFilePath": "batchjobs/filestocheck/outputfile.txt" // } string inputFolderPath = activity.typeProperties.extendedProperties.InputFolderPath; string outputFilePath = activity.typeProperties.extendedProperties.OutputFilePath; //V1 Logger is no longer required as your executable can directly write to STDOUT Console.WriteLine(string.Format("InputFilePath: {0}, OutputFilePath: {1}", inputFolderPath, outputFilePath)); // Extract Input & Output Dataset // If you would like to continue using Datasets, pass the Datasets in referenceObjects of the Custom Activity JSON payload like below: //"referenceObjects": { // "linkedServices": [ // { // "referenceName": "BatchStorageLinkedService", // "type": "LinkedServiceReference" // } // ], // "datasets": [ // { // "referenceName": "InputDataset", // "type": "DatasetReference" // }, // { // "referenceName": "OutputDataset", // "type": "DatasetReference" // } // ] // } // Then you can use following code to get the folder and file info instead: //dynamic datasets = JsonConvert.DeserializeObject(File.ReadAllText("datasets.json")); //dynamic inputDataset = ((JArray)datasets).First(_ => ((dynamic)_).name.ToString().StartsWith("InputDataset")); //dynamic outputDataset = ((JArray)datasets).First(_ => ((dynamic)_).name.ToString().StartsWith("OutputDataset")); //string inputFolderPath = inputDataset.properties.typeProperties.folderPath; //string outputFolderPath = outputDataset.properties.typeProperties.folderPath; //string outputFile = outputDataset.properties.typeProperties.fileName; //string outputFilePath = outputFolderPath + "/" + outputFile; //Once needed info is prepared, core business logic down below remains the same. string output = string.Empty; // for use later. // create storage client for input. Pass the connection string. CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); // initialize the continuation token before using it in the do-while loop. BlobContinuationToken continuationToken = null; do { // get the list of input blobs from the input storage client object. BlobResultSegment blobList = inputClient.ListBlobsSegmented(inputFolderPath, true, BlobListingDetails.Metadata, null, continuationToken, null, null); // Calculate method returns the number of occurrences of // the search term (“Microsoft”) in each blob associated // with the data slice. definition of the method is shown in the next step. output = Calculate(blobList, inputFolderPath, ref continuationToken, "Microsoft"); } while (continuationToken != null); CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(connectionString); // write the name of the file. Uri outputBlobUri = new Uri(outputStorageAccount.BlobEndpoint, outputFilePath); // log the output file name Console.WriteLine("output blob URI: {0}", outputBlobUri.ToString()); // create a blob and upload the output text. CloudBlockBlob outputBlob = new CloudBlockBlob(outputBlobUri, outputStorageAccount.Credentials); Console.WriteLine("Writing {0} to the output blob", output); outputBlob.UploadText(output); }
/// <summary> /// Calls https://datamarket.azure.com/dataset/aml_labs/lexicon_based_sentiment_analysis to calculate a sentiment for a twitter tweet. /// Register at the site to get an apikey. /// </summary> /// <param name="inputTables"></param> /// <param name="outputTables"></param> /// <param name="properties"></param> /// <param name="logger"></param> /// <returns></returns> public IDictionary <string, string> Execute(IEnumerable <ResolvedTable> inputTables, IEnumerable <ResolvedTable> outputTables, IDictionary <string, string> properties, IActivityLogger logger) { this.logger = logger; try { url = properties["url"]; logger.Write(TraceEventType.Information, "url {0}", url); apikey = properties["apikey"]; logger.Write(TraceEventType.Information, "apikey {0}", apikey); email = properties["apikey"]; logger.Write(TraceEventType.Information, "email {0}", email); foreach (var table in inputTables) { var connectionString = table.LinkedService.GetConnectionString(); var folder = table.Table.GetFolderPath(); if (folder == null || connectionString == null) { continue; } BlobContinuationToken continuationToken = null; CloudStorageAccount inputStorageAccount = CloudStorageAccount.Parse(connectionString); CloudBlobClient inputClient = inputStorageAccount.CreateCloudBlobClient(); do { BlobResultSegment result = inputClient.ListBlobsSegmented(folder, true, BlobListingDetails.Metadata, null, continuationToken, null, null); foreach (IListBlobItem listBlobItem in result.Results) { CloudBlockBlob inputBlob = listBlobItem as CloudBlockBlob; if (inputBlob != null) { foreach (var outputtable in outputTables) { var outputstorageaccount = CloudStorageAccount.Parse(outputtable.LinkedService.GetConnectionString()); var tableName = outputtable.Table.GetTableName(); var tableClient = outputstorageaccount.CreateCloudTableClient(); var outputAzureTable = tableClient.GetTableReference(tableName); outputAzureTable.CreateIfNotExists(); ProcessTweetBlob(inputBlob, outputAzureTable, folder); } } } continuationToken = result.ContinuationToken; } while (continuationToken != null); } } catch (Exception ex) { this.logger.Write(TraceEventType.Error, ex.ToString()); } return(new Dictionary <string, string>()); }
private void DeleteOldBlobsFromContainer(CloudBlobClient blobClient, string containerName) { // Figure out the timestamp before which all blobs will be deleted DateTime cutoffTime = DateTime.UtcNow.Add(-this.blobDeletionAge); this.traceSource.WriteInfo( this.logSourceId, "Deleting blobs older than {0}.", cutoffTime); // List all blobs in the container that need to be deleted // // Notes: // #1. The blobs have a virtual directory hierarchy inside the // container, so we explicitly opt for a flat listing of blobs. // #2. If the Azure runtime interfaces are available, then the top- // level virtual directory we create is inside the container // is the deployment ID. In this case, we only delete blobs under // the virtual container whose deployment ID matches our current // deployment ID. string prefix = string.Format( CultureInfo.InvariantCulture, "{0}{1}{2}{3}{4}{5}{6}{7}", containerName, blobClient.DefaultDelimiter, this.azureInterfaceAvailable ? (string.IsNullOrEmpty(this.deploymentId) ? AzureUtility.DeploymentId : this.deploymentId) : this.fabricNodeInstanceName, blobClient.DefaultDelimiter, this.azureInterfaceAvailable ? AzureUtility.RoleName : string.Empty, this.azureInterfaceAvailable ? blobClient.DefaultDelimiter : string.Empty, this.azureInterfaceAvailable ? AzureUtility.RoleInstanceId : string.Empty, this.azureInterfaceAvailable ? blobClient.DefaultDelimiter : string.Empty); BlobContinuationToken continuationToken = null; BlobRequestOptions requestOptions = new BlobRequestOptions(); OperationContext operationContext = new OperationContext(); do { BlobResultSegment resultSegment; try { this.perfHelper.ExternalOperationBegin( ExternalOperationTime.ExternalOperationType.BlobQuery, 0); // Get the blobs that were uploaded by the current node. The prefix // helps us identify them. resultSegment = blobClient.ListBlobsSegmented( prefix, true, // useFlatBlobListing BlobListingDetails.All, null, continuationToken, requestOptions, operationContext); this.perfHelper.ExternalOperationEnd( ExternalOperationTime.ExternalOperationType.BlobQuery, 0); } catch (Exception e) { this.traceSource.WriteError( this.logSourceId, "Exception encountered when attempting to enumerate blobs for deletion in container {0} in storage account {1}. Exception information: {2}", containerName, this.storageAccountFactory.Connection.AccountName, e); // If we encounter an error during enumeration of blobs for deletion, // we give up. this.BlobQueryAndDeleteExceptionHandler(e); break; } continuationToken = resultSegment.ContinuationToken; if (!resultSegment.Results.Any()) { continue; } this.perfHelper.AzureBlobsQueried((ulong)resultSegment.Results.Count()); // Get the blobs that are old enough to be deleted IEnumerable <IListBlobItem> blobsToDelete = resultSegment.Results .Where(blob => this.ShouldDeleteBlob( blob, cutoffTime)); // Go through the list and delete the blobs foreach (IListBlobItem blobInterface in blobsToDelete) { ICloudBlob blob = (ICloudBlob)blobInterface; try { this.perfHelper.ExternalOperationBegin( ExternalOperationTime.ExternalOperationType.BlobDeletion, 0); // DeleteIfExists allows for the case where the blob is being // deleted by other means (possibly by another instance of // the DCA) at the same time that we are trying to delete it. blob.DeleteIfExists(); this.perfHelper.ExternalOperationEnd( ExternalOperationTime.ExternalOperationType.BlobDeletion, 0); this.perfHelper.AzureBlobDeleted(); } catch (Exception e) { this.traceSource.WriteError( this.logSourceId, "Exception encountered when attempting to delete blob {0} in container {1} in storage account {2}. Exception information: {3}", blob.Name, containerName, this.storageAccountFactory.Connection.AccountName, e); // If we encounter an error during the deletion of one blob, // we'll still try and delete the others this.BlobQueryAndDeleteExceptionHandler(e); } if (this.stopping) { this.traceSource.WriteInfo( this.logSourceId, "The consumer is being stopped. Therefore, no more old blobs will be deleted from container {0} in storage account {1}.", containerName, this.storageAccountFactory.Connection.AccountName); break; } } if (this.stopping) { this.traceSource.WriteInfo( this.logSourceId, "The consumer is being stopped. Therefore, no more blobs will be enumerated for deletion from container {0} in storage account {1}.", containerName, this.storageAccountFactory.Connection.AccountName); break; } }while (continuationToken != null); }