Beispiel #1
0
        public string EnsureMd5(DataBlob dataBlob)
        {
            string blobMd5 = dataBlob.AzureBlob.Properties.ContentMD5;

            if (blobMd5 == null)
            {
                blobMd5 = dataBlob.CalculateMD5Hash().ToString();
                if (blobMd5 == null)
                {
                    throw (new MissingRequiredObject("\nMD5 calculation failed for " + dataBlob.AzureBlob.Name));
                }
                else
                {
                    dataBlob.AzureBlob.Properties.ContentMD5 = blobMd5;
                }
            }
            return(blobMd5);
        }
        private void EvaluationFailed(string blobName, string PendingSupervisionStorageContainerName, CloudStorageAccount StorageAccount, DataBlob dataEvaluating)
        {
            CloudBlockBlob PendingSupervision = _Search.GetBlob(StorageAccount, PendingSupervisionStorageContainerName, blobName);

            if (PendingSupervision == null)
            {
                throw (new MissingRequiredObject("\nMissing pendingSupervision " + blobName + " destination blob in container " + PendingSupervisionStorageContainerName));
            }

            _Engine.MoveAzureBlobToAzureBlob(StorageAccount, dataEvaluating.AzureBlob, PendingSupervision).Wait();
        }
        private void EvaluationPassed(string blobName, double ModelVerificationPercent, string ModelValidationStorageContainerName, string EvaluatedDataStorageContainerName, CloudStorageAccount StorageAccount, DataBlob dataEvaluating)
        {
            CloudBlockBlob EvaluatedData = _Search.GetBlob(StorageAccount, EvaluatedDataStorageContainerName, blobName);

            if (EvaluatedData == null)
            {
                throw (new MissingRequiredObject("\nMissing evaluatedData " + blobName + " destination blob in container " + EvaluatedDataStorageContainerName));
            }
            _Engine.CopyAzureBlobToAzureBlob(StorageAccount, dataEvaluating.AzureBlob, EvaluatedData).Wait();

            //pick a random number of successfully analyzed content blobs and submit them for supervision verification.
            Random Rnd = new Random();

            if (Math.Round(Rnd.NextDouble(), 2) <= ModelVerificationPercent)
            {
                CloudBlockBlob ModelValidation = _Search.GetBlob(StorageAccount, ModelValidationStorageContainerName, blobName);
                if (ModelValidation == null)
                {
                    _Log.LogInformation("\nWarning: Model validation skipped for " + blobName + " because of missing evaluatedData " + blobName + " destination blob in container " + ModelValidationStorageContainerName);
                }
                else
                {
                    _Engine.CopyAzureBlobToAzureBlob(StorageAccount, dataEvaluating.AzureBlob, ModelValidation).Wait();
                }
            }
            dataEvaluating.AzureBlob.DeleteIfExistsAsync();
        }
        public string AddLabeledData()
        {
            string TrainingDataUrl;
            CloudStorageAccount StorageAccount = _Engine.StorageAccount;
            CloudBlobClient     BlobClient     = StorageAccount.CreateCloudBlobClient();
            string             labeledDataStorageContainerName = _Engine.GetEnvironmentVariable("labeledDataStorageContainerName", _Log);
            CloudBlobContainer LabeledDataContainer            = BlobClient.GetContainerReference(labeledDataStorageContainerName);

            foreach (IListBlobItem item in LabeledDataContainer.ListBlobs(null, false))
            {
                if (item.GetType() == typeof(CloudBlockBlob))
                {
                    CloudBlockBlob dataCloudBlockBlob = (CloudBlockBlob)item;
                    TrainingDataUrl = dataCloudBlockBlob.Uri.ToString();
                    string BindingHash = dataCloudBlockBlob.Properties.ContentMD5.ToString();
                    if (BindingHash == null)
                    {
                        //compute the file hash as this will be added to the meta data to allow for file version validation
                        string BlobMd5 = new DataBlob(dataCloudBlockBlob, _Engine, _Search, _Log).CalculateMD5Hash().ToString();
                        if (BlobMd5 == null)
                        {
                            _Log.LogInformation("\nWarning: Blob Hash calculation failed and will not be included in file information blob, continuing operation.");
                        }
                        else
                        {
                            dataCloudBlockBlob.Properties.ContentMD5 = BlobMd5;
                        }
                    }
                    //trim the 2 "equals" off the trailing end of the hash or the http send will fail either using the client or raw http calls.
                    BindingHash = BindingHash.Substring(0, BindingHash.Length - 2);

                    //Get the content from the bound JSON file and instanciate a JsonBlob class then retrieve the labels collection from the Json to add to the image.
                    JsonBlob boundJson = (JsonBlob)_Search.GetBlob("json", BindingHash);
                    //Note you cannot pull the URL from the JSON blob because it will have the original URL from the first container when the blob was added to ML Professoar
                    string labeledDataUrl           = dataCloudBlockBlob.StorageUri.PrimaryUri.ToString();
                    string addLabeledDataParameters = $"?dataBlobUrl={labeledDataUrl}";
                    string trainingDataLabels       = Uri.EscapeDataString(JsonConvert.SerializeObject(boundJson.Labels));
                    addLabeledDataParameters = $"{addLabeledDataParameters}&imageLabels={trainingDataLabels}";

                    //construct and call model URL then fetch response
                    // the model always sends the label set in the message body with the name LabelsJson.  If your model needs other values in the URL then use
                    //{ {environment variable name}}.
                    // So the example load labels function in the sameple model package would look like this:
                    // https://branddetectionapp.azurewebsites.net/api/loadimagetags/?projectID={{ProjectID}}
                    // The orchestration engine appends the labels json file to the message body.
                    // http://localhost:7071/api/LoadImageTags/?projectID=8d9d12d1-5d5c-4893-b915-4b5b3201f78e&labelsJson={%22Labels%22:[%22Hemlock%22,%22Japanese%20Cherry%22]}

                    string labeledDataServiceEndpoint = _Engine.GetEnvironmentVariable("LabeledDataServiceEndpoint", _Log);
                    string addLabeledDataUrl          = _Engine.ConstructModelRequestUrl(labeledDataServiceEndpoint, addLabeledDataParameters);
                    _Log.LogInformation($"\n Getting response from {addLabeledDataUrl}");
                    _Response       = _Client.GetAsync(addLabeledDataUrl).Result;
                    _ResponseString = _Response.Content.ReadAsStringAsync().Result;
                    if (string.IsNullOrEmpty(_ResponseString))
                    {
                        throw (new MissingRequiredObject($"\nresponseString not generated from URL: {addLabeledDataUrl}"));
                    }

                    //the code below is for passing labels and conent as http content and not on the URL string.
                    //Format the Data Labels content
                    //HttpRequestMessage Request = new HttpRequestMessage(HttpMethod.Post, new Uri(AddLabeledDataUrl));
                    //HttpContent DataLabelsStringContent = new StringContent(trainingDataLabels, Encoding.UTF8, "application/x-www-form-urlencoded");
                    //MultipartFormDataContent LabeledDataContent = new MultipartFormDataContent();
                    //LabeledDataContent.Add(DataLabelsStringContent, "LabeledData");

                    //Format the data cotent
                    //*****TODO***** move to an async architecture
                    //*****TODO***** need to decide if there is value in sending the data as a binary stream in the post or if requireing the model data scienctist to accept URLs is sufficient.  If accessing the data blob with a SAS url requires Azure classes then create a configuration to pass the data as a stream in the post.  If there is then this should be a configurable option.
                    //MemoryStream dataBlobMemStream = new MemoryStream();
                    //dataBlob.DownloadToStream(dataBlobMemStream);
                    //HttpContent LabeledDataHttpContent = new StreamContent(dataBlobMemStream);
                    //LabeledDataContent.Add(LabeledDataContent, "LabeledData");

                    //Make the http call and get a response
                    //string AddLabelingTagsEndpoint = Engine.GetEnvironmentVariable("LabeledDataServiceEndpoint", log);
                    //if (string.IsNullOrEmpty(AddLabelingTagsEndpoint)) throw (new EnvironmentVariableNotSetException("LabeledDataServiceEndpoint environment variable not set"));
                    //string ResponseString = Helper.GetEvaluationResponseString(AddLabelingTagsEndpoint, LabeledDataContent, log);
                    //if (string.IsNullOrEmpty(ResponseString)) throw (new MissingRequiredObject("\nresponseString not generated from URL: " + AddLabelingTagsEndpoint));

                    _Log.LogInformation($"Completed call to add blob: {dataCloudBlockBlob.Name} with labels: {JsonConvert.SerializeObject(boundJson.Labels)} to model.  The response string was: {_ResponseString}.");
                }
            }
            return("Completed execution of AddLabeledData.  See logs for success/fail details.");
        }
        public async Task <string> EvaluateData(string blobName)
        {
            try
            {
                double ModelVerificationPercent            = 0;
                string ModelValidationStorageContainerName = "";

                string StorageConnection = _Engine.GetEnvironmentVariable("AzureWebJobsStorage", _Log);
                string PendingEvaluationStorageContainerName = _Engine.GetEnvironmentVariable("pendingEvaluationStorageContainerName", _Log);
                string EvaluatedDataStorageContainerName     = _Engine.GetEnvironmentVariable("evaluatedDataStorageContainerName", _Log);
                string JsonStorageContainerName = _Engine.GetEnvironmentVariable("jsonStorageContainerName", _Log);
                string PendingSupervisionStorageContainerName = _Engine.GetEnvironmentVariable("pendingSupervisionStorageContainerName", _Log);
                string ConfidenceJsonPath  = _Engine.GetEnvironmentVariable("confidenceJSONPath", _Log);
                double ConfidenceThreshold = Convert.ToDouble(_Engine.GetEnvironmentVariable("confidenceThreshold", _Log));

                string modelType = _Engine.GetEnvironmentVariable("modelType", _Log);
                if (modelType == "Trained")
                {
                    string LabeledDataStorageContainerName = _Engine.GetEnvironmentVariable("labeledDataStorageContainerName", _Log);
                    ModelValidationStorageContainerName = _Engine.GetEnvironmentVariable("modelValidationStorageContainerName", _Log);
                    string PendingNewModelStorageContainerName = _Engine.GetEnvironmentVariable("pendingNewModelStorageContainerName", _Log);
                    ModelVerificationPercent = Convert.ToDouble(_Engine.GetEnvironmentVariable("modelVerificationPercentage", _Log));
                }

                //------------------------This section retrieves the blob needing evaluation and calls the evaluation service for processing.-----------------------

                // Create Reference to Azure Storage Account and the container for data that is pending evaluation by the model.
                CloudStorageAccount StorageAccount = CloudStorageAccount.Parse(StorageConnection);
                CloudBlobClient     BlobClient     = StorageAccount.CreateCloudBlobClient();
                CloudBlobContainer  Container      = BlobClient.GetContainerReference(PendingEvaluationStorageContainerName);
                CloudBlockBlob      rawDataBlob    = Container.GetBlockBlobReference(blobName);
                DataBlob            dataEvaluating = new DataBlob(rawDataBlob, _Engine, _Search, _Log);
                if (dataEvaluating == null)
                {
                    throw (new MissingRequiredObject("\nMissing dataEvaluating blob object."));
                }

                //compute the file hash as this will be added to the meta data to allow for file version validation
                //the blob has to be "touched" or the properties will all be null
                if (dataEvaluating.AzureBlob.Exists() != true)
                {
                    throw new MissingRequiredObject($"\ndataEvaluating does not exist {dataEvaluating.AzureBlob.Name}");
                }
                ;

                string blobMd5 = _Engine.EnsureMd5(dataEvaluating);

                //****Currently only working with public access set on blob folders
                //Generate a URL with SAS token to submit to analyze image API
                //string dataEvaluatingSas = GetBlobSharedAccessSignature(dataEvaluating);
                string DataEvaluatingUrl = dataEvaluating.AzureBlob.Uri.ToString(); //+ dataEvaluatingSas;
                //string dataEvaluatingUrl = "test";

                //package the file contents to send as http request content
                //MemoryStream DataEvaluatingContent = new MemoryStream();
                //DataEvaluating.AzureBlob.DownloadToStreamAsync(DataEvaluatingContent);
                //HttpContent DataEvaluatingStream = new StreamContent(DataEvaluatingContent);
                var content = new MultipartFormDataContent();
                //content.Add(DataEvaluatingStream, "name");

                //get environment variables used to construct the model request URL
                string dataEvaluationServiceEndpoint = _Engine.GetEnvironmentVariable("DataEvaluationServiceEndpoint", _Log);
                string evaluationDataParameterName   = _Engine.GetEnvironmentVariable("evaluationDataParameterName", _Log);
                string parameters      = $"?{evaluationDataParameterName}={DataEvaluatingUrl}";
                string evaluateDataUrl = _Engine.ConstructModelRequestUrl(dataEvaluationServiceEndpoint, parameters);

                int    retryLoops     = 0;
                string responseString = "";
                do
                {
                    //Make a request to the model service passing the file URL
                    responseString = _Engine.GetHttpResponseString(evaluateDataUrl, content);
                    if (responseString.Contains("iteration"))
                    {
                        _Log.LogInformation($"\nEvaluation response: {responseString}.");
                        break;
                    }
                    retryLoops++;
                    await Task.Delay(1000);

                    if (retryLoops == 5)
                    {
                        _Log.LogInformation($"\nEvaluation of {evaluateDataUrl} failed with 5 attempts.");
                    }
                } while (retryLoops < 5);

                string    StrConfidence    = null;
                double    Confidence       = 0;
                JProperty responseProperty = new JProperty("Response", responseString);

                if (responseString == "Model not trained.")
                {
                    Confidence = 0;
                }
                else
                {
                    //deserialize response JSON, get confidence score and compare with confidence threshold
                    JObject analysisJson = JObject.Parse(responseString);
                    try
                    {
                        StrConfidence = (string)analysisJson.SelectToken(ConfidenceJsonPath);
                    }
                    catch
                    {
                        throw (new MissingRequiredObject($"\nInvalid response string {responseString} generated from URL: {evaluateDataUrl}."));
                    }

                    if (StrConfidence == null)
                    {
                        throw (new MissingRequiredObject("\nNo confidence value at " + ConfidenceJsonPath + " from environment variable ConfidenceJSONPath."));
                    }
                    Confidence = (double)analysisJson.SelectToken(ConfidenceJsonPath);
                }

                //----------------------------This section collects information about the blob being analyzed and packages it in JSON that is then written to blob storage for later processing-----------------------------------

                _Log.LogInformation("\nStarting construction of json blob.");

                //create environment JSON object
                JProperty environmentProperty = _Engine.GetEnvironmentJson(_Log);
                JProperty evaluationPass      = new JProperty("pass",
                                                              new JObject(
                                                                  new JProperty("date", DateTime.Now),
                                                                  environmentProperty,
                                                                  new JProperty("request", evaluateDataUrl),
                                                                  responseProperty
                                                                  )
                                                              );

                //Note: all json files get writted to the same container as they are all accessed either by discrete name or by azure search index either GUID or Hash.
                bool jsonBlobExists;
                try
                {
                    //does the json blob exist?  if not error is thrown and we catch it and create a new json blob file.
                    JsonBlob jsonBlobTest = new JsonBlob(blobMd5, _Engine, _Search, _Log);
                    jsonBlobExists = true;
                }
                catch
                {
                    _Log.LogInformation($"\nNo JSON blob found in seach index, creating new JSON blob for blob {dataEvaluating.AzureBlob.Name}.");
                    jsonBlobExists = false;
                }

                // If the Json blob already exists then update the blob with this pass iteration information
                if (jsonBlobExists)
                {
                    JsonBlob jsonBlob          = new JsonBlob(blobMd5, _Engine, _Search, _Log);
                    JObject  jsonBlobJObject   = JObject.Parse(jsonBlob.AzureBlob.DownloadText());
                    JArray   evaluationHistory = (JArray)jsonBlobJObject.SelectToken("passes");
                    JObject  evaluationsObject = new JObject
                    {
                        evaluationPass
                    };
                    evaluationHistory.Add(evaluationsObject);
                    string serializedJsonBlob = JsonConvert.SerializeObject(jsonBlobJObject, Formatting.Indented, new JsonSerializerSettings {
                    });
                    Stream jsonBlobMemStream  = new MemoryStream(Encoding.UTF8.GetBytes(serializedJsonBlob));
                    if (jsonBlobMemStream.Length != 0)
                    {
                        await jsonBlob.AzureBlob.UploadFromStreamAsync(jsonBlobMemStream);
                    }
                    else
                    {
                        throw (new ZeroLengthFileException("\nencoded JSON memory stream is zero length and cannot be writted to blob storage"));
                    }
                }

                // If the Json blob does not exist create one
                else
                {
                    // Add blob info to Json
                    JObject BlobAnalysis =
                        new JObject(
                            new JProperty("id", Guid.NewGuid().ToString()),
                            new JProperty("IsDeleted", false),
                            new JProperty("blobInfo",
                                          new JObject(
                                              new JProperty("name", blobName),
                                              new JProperty("url", dataEvaluating.AzureBlob.Uri.ToString()),
                                              new JProperty("modified", dataEvaluating.AzureBlob.Properties.LastModified.ToString()),
                                              new JProperty("hash", blobMd5)
                                              )
                                          )
                            );

                    // Add pass infromation to Json blob
                    JArray  evaluations       = new JArray();
                    JObject evaluationsObject = new JObject
                    {
                        evaluationPass
                    };
                    evaluations.Add(evaluationsObject);
                    JProperty evaluationPasses = new JProperty("passes", evaluations);
                    BlobAnalysis.Add(evaluationPasses);
                    CloudBlockBlob JsonCloudBlob = _Search.GetBlob(StorageAccount, JsonStorageContainerName, (string)BlobAnalysis.SelectToken("id") + ".json");
                    JsonCloudBlob.Properties.ContentType = "application/json";
                    string serializedJson = JsonConvert.SerializeObject(BlobAnalysis, Newtonsoft.Json.Formatting.Indented, new JsonSerializerSettings {
                    });
                    Stream MemStream      = new MemoryStream(Encoding.UTF8.GetBytes(serializedJson));
                    if (MemStream.Length != 0)
                    {
                        JsonCloudBlob.UploadFromStream(MemStream);
                    }
                    else
                    {
                        throw (new ZeroLengthFileException("\nencoded JSON memory stream is zero length and cannot be writted to blob storage"));
                    }
                }


                //--------------------------------This section processes the results of the analysis and transferes the blob to the container responsible for the next appropriate stage of processing.-------------------------------

                //model successfully analyzed content
                if (Confidence >= ConfidenceThreshold)
                {
                    EvaluationPassed(blobName, ModelVerificationPercent, ModelValidationStorageContainerName, EvaluatedDataStorageContainerName, StorageAccount, dataEvaluating);
                }

                //model was not sufficiently confident in its analysis
                else
                {
                    EvaluationFailed(blobName, PendingSupervisionStorageContainerName, StorageAccount, dataEvaluating);
                }

                _Log.LogInformation($"C# Blob trigger function Processed blob\n Name:{blobName}");
            }
            catch (MissingRequiredObject e)
            {
                _Log.LogInformation("\n" + blobName + " could not be analyzed because of a MissingRequiredObject with message: " + e.Message);
            }
            catch (Exception e)
            {
                _Log.LogInformation("\n" + blobName + " could not be analyzed with message: " + e.Message);
            }
            return($"Evaluate data completed evaluating data blob: {blobName}");
        }
        public string EvaluateData(string blobName)
        {
            try
            {
                string PendingEvaluationStorageContainerName = _Engine.GetEnvironmentVariable("pendingEvaluationStorageContainerName", _Log);
                string EvaluatedDataStorageContainerName     = _Engine.GetEnvironmentVariable("evaluatedDataStorageContainerName", _Log);
                string JsonStorageContainerName = _Engine.GetEnvironmentVariable("jsonStorageContainerName", _Log);
                string PendingSupervisionStorageContainerName = _Engine.GetEnvironmentVariable("pendingSupervisionStorageContainerName", _Log);
                string LabeledDataStorageContainerName        = _Engine.GetEnvironmentVariable("labeledDataStorageContainerName", _Log);
                string ModelValidationStorageContainerName    = _Engine.GetEnvironmentVariable("modelValidationStorageContainerName", _Log);
                string PendingNewModelStorageContainerName    = _Engine.GetEnvironmentVariable("pendingNewModelStorageContainerName", _Log);
                string StorageConnection        = _Engine.GetEnvironmentVariable("AzureWebJobsStorage", _Log);
                string ConfidenceJsonPath       = _Engine.GetEnvironmentVariable("confidenceJSONPath", _Log);
                string DataTagsBlobName         = _Engine.GetEnvironmentVariable("dataTagsBlobName", _Log);
                double ConfidenceThreshold      = Convert.ToDouble(_Engine.GetEnvironmentVariable("confidenceThreshold", _Log));
                double ModelVerificationPercent = Convert.ToDouble(_Engine.GetEnvironmentVariable("modelVerificationPercentage", _Log));

                //------------------------This section retrieves the blob needing evaluation and calls the evaluation service for processing.-----------------------

                // Create Reference to Azure Storage Account and the container for data that is pending evaluation by the model.
                CloudStorageAccount StorageAccount = CloudStorageAccount.Parse(StorageConnection);
                CloudBlobClient     BlobClient     = StorageAccount.CreateCloudBlobClient();
                CloudBlobContainer  Container      = BlobClient.GetContainerReference(PendingEvaluationStorageContainerName);

                //Get a reference to a container, if the container does not exist create one then get the reference to the blob you want to evaluate."
                CloudBlockBlob RawDataBlob    = _Search.GetBlob(StorageAccount, JsonStorageContainerName, blobName, _Log);
                DataBlob       DataEvaluating = new DataBlob(RawDataBlob.Properties.ContentMD5, _Log);
                if (DataEvaluating == null)
                {
                    throw (new MissingRequiredObject("\nMissing dataEvaluating blob object."));
                }

                //compute the file hash as this will be added to the meta data to allow for file version validation
                string BlobMd5 = FrameworkBlob.CalculateMD5Hash(DataEvaluating.ToString());
                if (BlobMd5 == null)
                {
                    _Log.LogInformation("\nWarning: Blob Hash calculation failed and will not be included in file information blob, continuing operation.");
                }
                else
                {
                    DataEvaluating.AzureBlob.Properties.ContentMD5 = BlobMd5;
                }

                //****Currently only working with public access set on blob folders
                //Generate a URL with SAS token to submit to analyze image API
                //string dataEvaluatingSas = GetBlobSharedAccessSignature(dataEvaluating);
                string DataEvaluatingUrl = DataEvaluating.AzureBlob.Uri.ToString(); //+ dataEvaluatingSas;
                //string dataEvaluatingUrl = "test";

                //package the file contents to send as http request content
                //MemoryStream DataEvaluatingContent = new MemoryStream();
                //DataEvaluating.AzureBlob.DownloadToStreamAsync(DataEvaluatingContent);
                //HttpContent DataEvaluatingStream = new StreamContent(DataEvaluatingContent);
                var content = new MultipartFormDataContent();
                //content.Add(DataEvaluatingStream, "name");

                //Make a request to the model service passing the file URL
                string ResponseString = Helper.GetEvaluationResponseString(DataEvaluatingUrl, content, _Log);
                if (ResponseString == "")
                {
                    throw (new MissingRequiredObject("\nresponseString not generated from URL: " + DataEvaluatingUrl));
                }

                //deserialize response JSON, get confidence score and compare with confidence threshold
                JObject AnalysisJson  = JObject.Parse(ResponseString);
                string  StrConfidence = (string)AnalysisJson.SelectToken(ConfidenceJsonPath);
                double  Confidence    = (double)AnalysisJson.SelectToken(ConfidenceJsonPath);
                if (StrConfidence == null)
                {
                    throw (new MissingRequiredObject("\nNo confidence value at " + ConfidenceJsonPath + " from environment variable ConfidenceJSONPath."));
                }

                //--------------------------------This section processes the results of the analysis and transferes the blob to the container responsible for the next appropriate stage of processing.-------------------------------

                //model successfully analyzed content
                if (Confidence >= ConfidenceThreshold)
                {
                    CloudBlockBlob EvaluatedData = _Search.GetBlob(StorageAccount, EvaluatedDataStorageContainerName, blobName, _Log);
                    if (EvaluatedData == null)
                    {
                        throw (new MissingRequiredObject("\nMissing evaluatedData " + blobName + " destination blob in container " + EvaluatedDataStorageContainerName));
                    }
                    _Engine.CopyAzureBlobToAzureBlob(StorageAccount, DataEvaluating.AzureBlob, EvaluatedData, _Log).Wait();

                    //pick a random number of successfully analyzed content blobs and submit them for supervision verification.
                    Random Rnd = new Random();
                    if (Math.Round(Rnd.NextDouble(), 2) <= ModelVerificationPercent)
                    {
                        CloudBlockBlob ModelValidation = _Search.GetBlob(StorageAccount, ModelValidationStorageContainerName, blobName, _Log);
                        if (ModelValidation == null)
                        {
                            _Log.LogInformation("\nWarning: Model validation skipped for " + blobName + " because of missing evaluatedData " + blobName + " destination blob in container " + ModelValidationStorageContainerName);
                        }
                        else
                        {
                            _Engine.MoveAzureBlobToAzureBlob(StorageAccount, DataEvaluating.AzureBlob, ModelValidation, _Log).Wait();
                        }
                    }
                    DataEvaluating.AzureBlob.DeleteIfExistsAsync();
                }

                //model was not sufficiently confident in its analysis
                else
                {
                    CloudBlockBlob PendingSupervision = _Search.GetBlob(StorageAccount, PendingSupervisionStorageContainerName, blobName, _Log);
                    if (PendingSupervision == null)
                    {
                        throw (new MissingRequiredObject("\nMissing pendingSupervision " + blobName + " destination blob in container " + PendingSupervisionStorageContainerName));
                    }

                    _Engine.MoveAzureBlobToAzureBlob(StorageAccount, DataEvaluating.AzureBlob, PendingSupervision, _Log).Wait();
                }

                //----------------------------This section collects information about the blob being analyzied and packages it in JSON that is then written to blob storage for later processing-----------------------------------

                JObject BlobAnalysis =
                    new JObject(
                        new JProperty("id", Guid.NewGuid().ToString()),
                        new JProperty("blobInfo",
                                      new JObject(
                                          new JProperty("name", blobName),
                                          new JProperty("url", DataEvaluating.AzureBlob.Uri.ToString()),
                                          new JProperty("modified", DataEvaluating.AzureBlob.Properties.LastModified.ToString()),
                                          new JProperty("hash", BlobMd5)
                                          )
                                      )
                        );

                //create environment JSON object
                JProperty BlobEnvironment = _Engine.GetEnvironmentJson(_Log);

                BlobAnalysis.Add(BlobEnvironment);
                BlobAnalysis.Merge(AnalysisJson);

                //Note: all json files get writted to the same container as they are all accessed either by discrete name or by azure search index either GUID or Hash.
                CloudBlockBlob JsonBlob = _Search.GetBlob(StorageAccount, JsonStorageContainerName, (string)BlobAnalysis.SelectToken("blobInfo.id") + ".json", _Log);
                JsonBlob.Properties.ContentType = "application/json";
                string SerializedJson = JsonConvert.SerializeObject(BlobAnalysis, Newtonsoft.Json.Formatting.Indented, new JsonSerializerSettings {
                });
                Stream MemStream      = new MemoryStream(Encoding.UTF8.GetBytes(SerializedJson));
                if (MemStream.Length != 0)
                {
                    JsonBlob.UploadFromStreamAsync(MemStream);
                }
                else
                {
                    throw (new ZeroLengthFileException("\nencoded JSON memory stream is zero length and cannot be writted to blob storage"));
                }
                _Log.LogInformation($"C# Blob trigger function Processed blob\n Name:{blobName}");
            }
            catch (MissingRequiredObject e)
            {
                _Log.LogInformation("\n" + blobName + " could not be analyzed with message: " + e.Message);
            }
            catch (Exception e)
            {
                _Log.LogInformation("\n" + blobName + " could not be analyzed with message: " + e.Message);
            }
            return($"Evaluate data completed evaluating data blob: {blobName}");
        }
        private async void EvaluationFailed(string blobName, string pendingSupervisionStorageContainerName, CloudStorageAccount storageAccount, DataBlob dataEvaluating)
        {
            CloudBlockBlob pendingSupervision = _Search.GetBlob(storageAccount, pendingSupervisionStorageContainerName, blobName);

            if (pendingSupervision == null)
            {
                throw (new MissingRequiredObject($"\nMissing pendingSupervision {blobName} destination blob in container {pendingSupervisionStorageContainerName}"));
            }

            try
            {
                _Engine.MoveAzureBlobToAzureBlob(storageAccount, dataEvaluating.AzureBlob, pendingSupervision).Wait();
            }
            catch
            {
                throw;
            }

            //Hydrate Json Blob
            JsonBlob jsonBlob        = new JsonBlob(dataEvaluating.AzureBlob.Properties.ContentMD5, _Engine, _Search, _Log);
            JObject  jsonBlobJObject = JObject.Parse(jsonBlob.AzureBlob.DownloadText());

            // Add a state change too the Json Blob
            JArray stateHistory = (JArray)jsonBlobJObject.SelectToken("StateHistory");

            AddStateChange(pendingSupervisionStorageContainerName, stateHistory);

            // Upload blob changes to the cloud
            await _Engine.UploadJsonBlob(jsonBlob.AzureBlob, jsonBlobJObject);
        }
        private async void EvaluationPassed(double modelVerificationPercent, string modelValidationStorageContainerName, string evaluatedDataStorageContainerName, CloudStorageAccount storageAccount, DataBlob dataEvaluating)
        {
            CloudBlockBlob evaluatedData = _Search.GetBlob(storageAccount, evaluatedDataStorageContainerName, dataEvaluating.AzureBlob.Name);

            if (evaluatedData == null)
            {
                throw (new MissingRequiredObject($"\nevaluatedData blob {dataEvaluating.AzureBlob.Name} destination blob not created in container {evaluatedDataStorageContainerName}"));
            }

            _Engine.CopyAzureBlobToAzureBlob(storageAccount, dataEvaluating.AzureBlob, evaluatedData).Wait();

            try
            {
                //Hydrate Json Blob
                JsonBlob jsonBlob        = new JsonBlob(dataEvaluating.AzureBlob.Properties.ContentMD5, _Engine, _Search, _Log);
                JObject  jsonBlobJObject = JObject.Parse(jsonBlob.AzureBlob.DownloadText());

                // Add a state change too the Json Blob
                JArray stateHistory = (JArray)jsonBlobJObject.SelectToken("StateHistory");
                AddStateChange(evaluatedDataStorageContainerName, stateHistory);

                // Upload blob changes to the cloud
                await _Engine.UploadJsonBlob(jsonBlob.AzureBlob, jsonBlobJObject);
            }
            catch
            {
                throw;
            }

            //pick a random number of successfully analyzed content blobs and submit them for supervision verification.
            Random rnd = new Random();

            if (Math.Round(rnd.NextDouble(), 2) <= modelVerificationPercent)
            {
                CloudBlockBlob modelValidation = _Search.GetBlob(storageAccount, modelValidationStorageContainerName, dataEvaluating.AzureBlob.Name);
                if (modelValidation == null)
                {
                    _Log.LogInformation($"\nWarning: Model validation skipped for {dataEvaluating.AzureBlob.Name} because {dataEvaluating.AzureBlob.Name} not created in destination blob in container {modelValidationStorageContainerName}");
                }
                else
                {
                    _Engine.CopyAzureBlobToAzureBlob(storageAccount, dataEvaluating.AzureBlob, modelValidation).Wait();
                }
            }
            await dataEvaluating.AzureBlob.DeleteIfExistsAsync();
        }
        public async Task <string> EvaluateData(string blobName)
        {
            try
            {
                double modelVerificationPercent            = 0;
                string modelValidationStorageContainerName = "";

                string storageConnection = _Engine.GetEnvironmentVariable("AzureWebJobsStorage", _Log);
                string pendingEvaluationStorageContainerName = _Engine.GetEnvironmentVariable("pendingEvaluationStorageContainerName", _Log);
                string evaluatedDataStorageContainerName     = _Engine.GetEnvironmentVariable("evaluatedDataStorageContainerName", _Log);
                string jsonStorageContainerName = _Engine.GetEnvironmentVariable("jsonStorageContainerName", _Log);
                string pendingSupervisionStorageContainerName = _Engine.GetEnvironmentVariable("pendingSupervisionStorageContainerName", _Log);
                string confidenceJsonPath  = _Engine.GetEnvironmentVariable("confidenceJSONPath", _Log);
                double confidenceThreshold = Convert.ToDouble(_Engine.GetEnvironmentVariable("confidenceThreshold", _Log));

                string modelType = _Engine.GetEnvironmentVariable("modelType", _Log);
                if (modelType == "Trained")
                {
                    string labeledDataStorageContainerName = _Engine.GetEnvironmentVariable("labeledDataStorageContainerName", _Log);
                    modelValidationStorageContainerName = _Engine.GetEnvironmentVariable("modelValidationStorageContainerName", _Log);
                    string pendingNewModelStorageContainerName = _Engine.GetEnvironmentVariable("pendingNewModelStorageContainerName", _Log);
                    modelVerificationPercent = Convert.ToDouble(_Engine.GetEnvironmentVariable("modelVerificationPercentage", _Log));
                }

                //------------------------This section retrieves the blob needing evaluation and calls the evaluation service for processing.-----------------------

                // Create Reference to Azure Storage Account and the container for data that is pending evaluation by the model.
                CloudStorageAccount storageAccount = CloudStorageAccount.Parse(storageConnection);
                CloudBlobClient     blobClient     = storageAccount.CreateCloudBlobClient();
                CloudBlobContainer  container      = blobClient.GetContainerReference(pendingEvaluationStorageContainerName);
                CloudBlockBlob      rawDataBlob    = container.GetBlockBlobReference(blobName);
                DataBlob            dataEvaluating = new DataBlob(rawDataBlob, _Engine, _Search, _Log);
                if (dataEvaluating == null)
                {
                    throw (new MissingRequiredObject("\nMissing dataEvaluating blob object."));
                }

                //compute the file hash as this will be added to the meta data to allow for file version validation
                //the blob has to be "touched" or the properties will all be null
                if (dataEvaluating.AzureBlob.Exists() != true)
                {
                    throw new MissingRequiredObject($"\ndataEvaluating does not exist {dataEvaluating.AzureBlob.Name}");
                }
                ;

                string blobMd5 = _Engine.EnsureMd5(dataEvaluating);

                //****Currently only working with public access set on blob folders
                //Generate a URL with SAS token to submit to analyze image API
                //string dataEvaluatingSas = GetBlobSharedAccessSignature(dataEvaluating);
                string dataEvaluatingUrl = dataEvaluating.AzureBlob.Uri.ToString(); //+ dataEvaluatingSas;
                //string dataEvaluatingUrl = "test";

                //package the file contents to send as http request content
                //MemoryStream DataEvaluatingContent = new MemoryStream();
                //DataEvaluating.AzureBlob.DownloadToStreamAsync(DataEvaluatingContent);
                //HttpContent DataEvaluatingStream = new StreamContent(DataEvaluatingContent);
                var content = new MultipartFormDataContent();
                //content.Add(DataEvaluatingStream, "Name");

                //get environment variables used to construct the model request URL
                string dataEvaluationServiceEndpoint = _Engine.GetEnvironmentVariable("DataEvaluationServiceEndpoint", _Log);
                string evaluationDataParameterName   = _Engine.GetEnvironmentVariable("evaluationDataParameterName", _Log);
                string parameters      = $"?{evaluationDataParameterName}={dataEvaluatingUrl}";
                string evaluateDataUrl = _Engine.ConstructModelRequestUrl(dataEvaluationServiceEndpoint, parameters);

                int    retryLoops     = 0;
                string responseString = "";
                do
                {
                    //Make a request to the model service passing the file URL
                    responseString = _Engine.GetHttpResponseString(evaluateDataUrl, content);
                    //*****TODO***** "iteration" is a hard coded word that is specific to a model and needs to be a generic interface concept where the model must respond with an explicit success.
                    if (responseString.Contains("iteration"))
                    {
                        _Log.LogInformation($"\nEvaluation response: {responseString}.");
                        break;
                    }
                    retryLoops++;
                    await Task.Delay(1000);

                    if (retryLoops == 5)
                    {
                        _Log.LogInformation($"\nEvaluation of {evaluateDataUrl} failed 5 attempts with response: {responseString}");
                    }
                } while (retryLoops < 5);

                string    strConfidence    = null;
                double    confidence       = 0;
                JProperty responseProperty = new JProperty("Response", responseString);

                if (responseString == "Model not trained.")
                {
                    confidence = 0;
                }
                else
                {
                    //deserialize response JSON, get confidence score and compare with confidence threshold
                    JObject analysisJson = JObject.Parse(responseString);
                    try
                    {
                        strConfidence = (string)analysisJson.SelectToken(confidenceJsonPath);
                    }
                    catch
                    {
                        throw (new MissingRequiredObject($"\nInvalid response string {responseString} generated from URL: {evaluateDataUrl}."));
                    }

                    if (strConfidence == null)
                    {
                        //*****TODO***** if this fails the file will sit in the pending evaluation state because the trigger will have processed the file but the file could not be processed.  Need to figure out how to tell if a file failed processing so that we can reprocesses the file at a latter time.
                        throw (new MissingRequiredObject($"\nNo confidence value at {confidenceJsonPath} from environment variable ConfidenceJSONPath in response from model: {responseString}."));
                    }
                    confidence = (double)analysisJson.SelectToken(confidenceJsonPath);
                }

                //----------------------------This section collects information about the blob being analyzed and packages it in JSON that is then written to blob storage for later processing-----------------------------------

                _Log.LogInformation("\nStarting construction of json blob.");

                //create environment JSON object
                JProperty environmentProperty = _Engine.GetEnvironmentJson(_Log);
                JProperty evaluationPass      = new JProperty("pass",
                                                              new JObject(
                                                                  new JProperty("date", DateTime.Now),
                                                                  environmentProperty,
                                                                  new JProperty("request", evaluateDataUrl),
                                                                  responseProperty
                                                                  )
                                                              );

                //Note: all json files get writted to the same container as they are all accessed either by discrete name or by azure search index either GUID or Hash.
                CloudBlobContainer jsonContainer = blobClient.GetContainerReference(jsonStorageContainerName);
                CloudBlockBlob     rawJsonBlob   = jsonContainer.GetBlockBlobReference(_Engine.GetEncodedHashFileName(dataEvaluating.AzureBlob.Properties.ContentMD5.ToString()));

                // If the Json blob already exists then update the blob with latest pass iteration information
                if (rawJsonBlob.Exists())
                {
                    //Hydrate Json Blob
                    JsonBlob jsonBlob        = new JsonBlob(blobMd5, _Engine, _Search, _Log);
                    JObject  jsonBlobJObject = JObject.Parse(jsonBlob.AzureBlob.DownloadText());

                    // Add an evaluation pass to the Json blob
                    JArray evaluationHistory = (JArray)jsonBlobJObject.SelectToken("Passes");
                    AddEvaluationPass(evaluationPass, evaluationHistory);

                    // Upload blob changes to the cloud
                    await _Engine.UploadJsonBlob(jsonBlob.AzureBlob, jsonBlobJObject);
                }

                // If the Json blob does not exist create one and include the latest pass iteration information
                else
                {
                    JObject BlobAnalysis =
                        new JObject(
                            new JProperty("Id", Guid.NewGuid().ToString()),
                            new JProperty("IsDeleted", false),
                            new JProperty("Name", blobName),
                            new JProperty("Hash", blobMd5)
                            );

                    // Add state history information to Json blob
                    JArray stateChanges = new JArray();
                    AddStateChange(pendingEvaluationStorageContainerName, stateChanges);
                    JProperty stateHistory = new JProperty("StateHistory", stateChanges);
                    BlobAnalysis.Add(stateHistory);

                    // Add pass infromation to Json blob
                    JArray evaluations = new JArray();
                    AddEvaluationPass(evaluationPass, evaluations);
                    JProperty evaluationPasses = new JProperty("Passes", evaluations);
                    BlobAnalysis.Add(evaluationPasses);

                    CloudBlockBlob JsonCloudBlob = _Search.GetBlob(storageAccount, jsonStorageContainerName, _Engine.GetEncodedHashFileName(blobMd5));
                    JsonCloudBlob.Properties.ContentType = "application/json";

                    await _Engine.UploadJsonBlob(JsonCloudBlob, BlobAnalysis);
                }


                //--------------------------------This section processes the results of the analysis and transferes the blob to the container responsible for the next appropriate stage of processing.-------------------------------

                //model successfully analyzed content
                if (confidence >= confidenceThreshold)
                {
                    EvaluationPassed(modelVerificationPercent, modelValidationStorageContainerName, evaluatedDataStorageContainerName, storageAccount, dataEvaluating);
                }

                //model was not sufficiently confident in its analysis
                else
                {
                    EvaluationFailed(blobName, pendingSupervisionStorageContainerName, storageAccount, dataEvaluating);
                }

                _Log.LogInformation($"C# Blob trigger function Processed blob\n Name:{blobName}");
            }
            catch (MissingRequiredObject e)
            {
                _Log.LogInformation($"\n{blobName} could not be analyzed because of a MissingRequiredObject with message: {e.Message}");
            }
            catch (Exception e)
            {
                _Log.LogInformation($"\n{blobName} could not be analyzed with message: {e.Message}");
            }
            return($"Evaluate data completed evaluating data blob: {blobName}");
        }
Beispiel #10
0
        public async Task <string> AddLabeledData()
        {
            string trainingDataUrl;
            CloudStorageAccount storageAccount = _Engine.StorageAccount;
            CloudBlobClient     blobClient     = storageAccount.CreateCloudBlobClient();
            string             labeledDataStorageContainerName = _Engine.GetEnvironmentVariable("labeledDataStorageContainerName");
            CloudBlobContainer labeledDataContainer            = blobClient.GetContainerReference(labeledDataStorageContainerName);
            string             loadTrainingTagsResult          = null;


            foreach (IListBlobItem item in labeledDataContainer.ListBlobs(null, false))
            {
                if (item.GetType() == typeof(CloudBlockBlob))
                {
                    CloudBlockBlob dataCloudBlockBlob = (CloudBlockBlob)item;
                    trainingDataUrl = dataCloudBlockBlob.Uri.ToString();
                    string bindingHash = dataCloudBlockBlob.Properties.ContentMD5.ToString();
                    if (bindingHash == null)
                    {
                        //compute the file hash as this will be added to the meta data to allow for file version validation
                        string BlobMd5 = new DataBlob(dataCloudBlockBlob, _Engine, _Search, _Log).CalculateMD5Hash().ToString();
                        if (BlobMd5 == null)
                        {
                            _Log.LogInformation("\nWarning: Blob Hash calculation failed and will not be included in file information blob, continuing operation.");
                        }
                        else
                        {
                            //*****TODO***** update this to calculate the hash as the code looks to ppopulate the hash from what is either null or already correct...
                            dataCloudBlockBlob.Properties.ContentMD5 = BlobMd5;
                        }
                    }

                    // Get sas token for current data blob
                    string dataEvaluatingUrl = _Engine.GetBlobSasTokenForServiceAccess(dataCloudBlockBlob);

                    // Instanciate the bound JSON blob making labels collection for the data available to send to the model.
                    JsonBlob boundJson = new JsonBlob(bindingHash, _Engine, _Search);

                    string evaluationDataParameterName = _Engine.GetEnvironmentVariable("evaluationDataParameterName");
                    string labelingTagsParameterName   = _Engine.GetEnvironmentVariable("labelingTagsParameterName");

                    // construct and call model URL then fetch response
                    //
                    // the model always sends the label set in the message body with the name configured in environment variable "labelingTagsParameterName".
                    // If your model needs other values from this applications context to be passed in the URL then use {{variable name}}.  So the example
                    // in the sample model package would have a value like this: https://branddetectionapp.azurewebsites.net/api/AddLabeledData/?projectID={{ProjectID}}
                    // in the end point environment variable to pass project id to the model.
                    //
                    // Get the environment variable.
                    string addLabeledDataServiceEndpoint = _Engine.GetEnvironmentVariable("LabeledDataServiceEndpoint");
                    string addLabeledDataUrl             = _Engine.ConstructModelRequestUrl(addLabeledDataServiceEndpoint, "");

                    //Load the list of valid training tags to ensure all data labels are valid.
                    loadTrainingTagsResult = LoadTrainingTags();

                    //Format the Data Labels content
                    MultipartFormDataContent labeledDataContent = new MultipartFormDataContent();
                    HttpContent dataLabelsStringContent         = new StringContent(boundJson.Labels, Encoding.UTF8, "application/x-www-form-urlencoded");
                    labeledDataContent.Add(dataLabelsStringContent, "DataLabels");
                    HttpContent dataUrlStringContent = new StringContent(dataEvaluatingUrl, Encoding.UTF8, "application/x-www-form-urlencoded");
                    labeledDataContent.Add(dataUrlStringContent, evaluationDataParameterName);

                    //Format the data cotent
                    //*****TODO***** the code below is for passing content as http content and not on the URL string.
                    //*****TODO***** move to an async architecture
                    //*****TODO***** need to decide if there is value in sending the data as a binary stream in the post or if requireing the model data scienctist to accept URLs is sufficient.  If accessing the data blob with a SAS url requires Azure classes then create a configuration to pass the data as a stream in the post.  If there is then this should be a configurable option.
                    //MemoryStream dataBlobMemStream = new MemoryStream();
                    //dataBlob.DownloadToStream(dataBlobMemStream);
                    //HttpContent LabeledDataHttpContent = new StreamContent(dataBlobMemStream);
                    //LabeledDataContent.Add(LabeledDataContent, "LabeledData");

                    _Log.LogInformation($"\n Getting response from add labeled data API using {dataLabelsStringContent}");

                    // format and make call to model end point and validate the response string.
                    _ResponseString = _Engine.GetHttpResponseString(addLabeledDataServiceEndpoint, labeledDataContent);
                    if (string.IsNullOrEmpty(_ResponseString))
                    {
                        throw (new MissingRequiredObject($"\nresponseString not generated from URL: {addLabeledDataUrl} using {boundJson.Name}.  Processing will stop for labeleddata blobs."));
                    }

                    _Log.LogInformation($"Completed call to add blob: {dataCloudBlockBlob.Name} with labels: {JsonConvert.SerializeObject(boundJson.Labels)} to model.  The response string was: {_ResponseString}.");
                }
            }
            return($"\nCompleted execution of AddLabeledData.  Loading Training Tags results: {loadTrainingTagsResult}.  See logs for success/fail details.");
        }