static void Main(string[] args) { //These are all the properties necessary to perform model retraining. //See the github repo for graphics and details //See https://github.com/jmarymee/convirga // //First we create an object to store all of the config parameters. MLRetrainerLib.RetrainerLib.MLRetrainConfig configobj = new MLRetrainerLib.RetrainerLib.MLRetrainConfig(); //We first add the parameters for the retrainer experiment. These are found in the Azure Studio. You must first publish your //training experiement as a retraining endpoint. Today is it called a 'predictive experiment' in Azure ML Studio //Once created it will have at least two output nodes; one for the newly updated/trained model (the iLearner) and a metrics/accuracy scores output //You can then publish as a web service endpoint and obtain the two following values: configobj.mlretrainerurl = Properties.Settings.Default.mlretrainermodelurl; configobj.mlretrainerkey = Properties.Settings.Default.mlretrainerkey; //In order to get these setup you MUST go to the Azure Portal (currently the legacy one), go to the ML workspace and go to Web Services. Go to the //Predictive experiment endpoint (not the training one) and create another endpoint. The reason for this is that you cannot deploy a new model to the initial, default //endpoint. Once you create the additional endpoint here, you can first locate the API key on the lower right side (that will be your endpoint key) //and then find and click on Update Resource. That will show you the endpoint URL. You can find the endpoint name in the sample C# code. BE SURE that you //copy it exactly as it is in the C# example. If it is mismatched, it will cause a failure when you attempt to programmatically deply the retrained model configobj.publishendpointurl = Properties.Settings.Default.enpointurl; configobj.publishendpointkey = Properties.Settings.Default.endpointkey; configobj.publishendpointname = Properties.Settings.Default.endpointname; configobj.publishendpoint2url = Properties.Settings.Default.endpoint2url; configobj.publishendpoint2key = Properties.Settings.Default.endpoint2key; //These are somewhat arbitrary. Essentially it's just a place to store the retrained models and CSV files with the scoring/accuracy daata. //You should manually created this storage area in Azure Blobs and then use the url, key and container name in the params below. configobj.mlretrainerstoragename = Properties.Settings.Default.mlstoragename; configobj.mlretrainerstoragekey = Properties.Settings.Default.mlstoragekey; configobj.mlretrainercontainername = Properties.Settings.Default.mlstoragecontainer; //This only takes one object but it must be prefilled first based on the notes above. //there are some internal activities that take place when you instantiate the object. One is that it will attempt to load //scoring data from the last time you ran this model retraining. If it doesn't exist, then it still creates an internal object but //it only has one entry in the Dictionary with a text string indicating no scoring data. MLRetrainerLib.RetrainerLib retrainer = new MLRetrainerLib.RetrainerLib(configobj); //You can change the date run of the SQL query in order to get a daily trained model //retrainer.UpdateSQLQueryForNewDate("2015-10-01"); //Upload new training set retrainer.UploadNewTrainingFileToStorage(@"C:\Users\jmarymee\Downloads\AIC-Dataset.csv"); //Used to get a blob handle to the retraining blob. Not necessary IF you uploaded a fresh set. If you did that then behind the scenes the lib grabbed a reference to the blob //But if you ALREADY uploaded a new set to your configured blob storage (using another tool) then this can be used to grab the Azure blob handle the retrainer needs. retrainer.GetRetrainingBlob("AIC-Dataset.csv"); //This is for display. it allows a person to view the results of the last model training. //This List is stored initially in the Library upon object instatiation. If there is no previous retrainging then there will only be one entry in the Dictionary. //This example is here only so that you can comoare the last result with the new result after retraining. //Console.WriteLine("Results of last training: "); //foreach(var val in retrainer.lastScores) //{ // Console.WriteLine("Rating Name: {0} : Value: {1}", val.Key, val.Value.ToString()); //} //Retraining a model takes two steps; queue up the job then start the job. One must save the jobID in order to start the job //STEP ONE: Assuming your config params are correct, you queue up a retraining job using this call. Be sure to save the jobID or else you won't be able to start //the job. If you don't start the job it will be automatically deleted after a few minutes (per the ML doc) //The Dictionary of parms setup below are used when you are 'steering' the retraining - such as algorithmn parms, SQL queryies etc. Dictionary <string, string> gParms = new Dictionary <string, string>(); gParms.Add("Fraction of rows in the first output dataset", "0.5"); string jobID = retrainer.QueueRetrainingAsync(MLRetrainerLib.RetrainerLib.TRAINING_DATA_SOURCE.DATA_UPLOAD, gParms).Result; //STEP TWO: This is how you start the retraining job retrainer.StartRetrainingJob(jobID).Wait(); //We use this to watch the retraining so that we can decide if we want to deploy to the endpoint or not. //We spin here on the token until we show complete //This var is declared here and checked in the Do/While below. Of course you could also use Delegates or Threads to wait on this if the main thread has other //work you want it do do. MLRetrainerLib.BatchScoreStatusCode status; //STEP THREE: Wait for retraining completion //Here is when we spin lock until we show that the retraining job is Finished do { status = retrainer.CheckJobStatus(jobID).Result; Console.WriteLine(status.ToString()); } while (!(status == MLRetrainerLib.BatchScoreStatusCode.Finished)); //Now we look at the new (latest) results. //These are pulled from CSV files in the configured Azure Blob Storaged Console.WriteLine("New Scores for retraining..."); Dictionary <string, double> scores = retrainer.GetLatestRetrainedResults(); foreach (var val in scores) { Console.WriteLine("Rating Name: {0} : Value: {1}", val.Key, val.Value.ToString()); } string resultOfRetrain = retrainer.GetLatestRetrainedResults(true); Console.WriteLine(resultOfRetrain); Console.WriteLine("Now deploying the new model to the published endpoint..."); //STEP FOUR: Check to see if the new model is more accurate. //Here is where we compare the current result to the last result. In this scenario, we compare AUC and if we haven't at //least seen a 20% improvement then we don't deploy the retrained model //There are seven values to review and the API currently only allows you to select one. In this case it's AUC. The second param indicates the //improvement amount that the retrained model should have in order to get TRUE back from the API call. bool isModelbetter = true; //Arbitrary for testing... //bool isModelbetter = retrainer.isUdpateModel("AUC", 0.02f); if (!isModelbetter) { Console.WriteLine("No need to update endpoint. Accuracy has not improved. Press a key to end"); Console.ReadLine(); return; //if the model isn't more accurate then terminate the app by returning } //STEP FIVE: Deploy the updated, retrained model if you like the scores //Here is where we deploy the model to the published endpoint IF the accuracy has met our hurdle //You use the same jobID that you used to start the job bool isUpdated = retrainer.UpdateModel(jobID).Result; if (isUpdated) { Console.WriteLine("Successful model retraining and endpoint deployment"); } else { Console.WriteLine("Something went wrong updating the model"); } Console.WriteLine("Process has completed. Press a key to end"); Console.ReadLine(); }
static void Main(string[] args) { //These are all the properties necessary to perform model retraining. //See the github repo for graphics and details //See https://github.com/jmarymee/azuremlmodelretrainer // //First we create an object to store all of the config parameters. MLRetrainerLib.RetrainerLib.MLRetrainConfig configobj = new MLRetrainerLib.RetrainerLib.MLRetrainConfig(); //We first add the parameters for the retrainer experiment. These are found in the Azure Studio. You must first publish your //training experiement as a retraining endpoint. Today is it called a 'predictive experiment' in Azure ML Studio //Once created it will have at least two output nodes; one for the newly updated/trained model (the iLearner) and a metrics/accuracy scores output //You can then publish as a web service endpoint and obtain the two following values: configobj.mlretrainerurl = Properties.Settings.Default.mlretrainermodelurl; configobj.mlretrainerkey = Properties.Settings.Default.mlretrainerkey; //In order to get these setup you MUST go to the Azure Portal (currently the legacy one), go to the ML workspace and go to Web Services. Go to the //Predictive experiment endpoint (not the training one) and create another endpoint. The reason for this is that you cannot deploy a new model to the initial, default //endpoint. Once you create the additional endpoint here, you can first locate the API key on the lower right side (that will be your endpoint key) //and then find and click on Update Resource. That will show you the endpoint URL. You can find the endpoint name in the sample C# code. BE SURE that you //copy it exactly as it is in the C# example. If it is mismatched, it will cause a failure when you attempt to programmatically deply the retrained model configobj.publishendpointurl = Properties.Settings.Default.enpointurl; configobj.publishendpointkey = Properties.Settings.Default.endpointkey; configobj.publishendpointname = Properties.Settings.Default.endpointname; //These are somewhat arbitrary. Essentially it's just a place to store the retrained models and CSV files with the scoring/accuracy daata. //You should manually created this storage area in Azure Blobs and then use the url, key and container name in the params below. configobj.mlretrainerstoragename = Properties.Settings.Default.mlstoragename; configobj.mlretrainerstoragekey = Properties.Settings.Default.mlstoragekey; configobj.mlretrainercontainername = Properties.Settings.Default.mlstoragecontainer; //This only takes one object but it must be prefilled first based on the notes above. //there are some internal activities that take place when you instantiate the object. One is that it will attempt to load //scoring data from the last time you ran this model retraining. If it doesn't exist, then it still creates an internal object but //it only has one entry in the Dictionary with a text string indicating no scoring data. MLRetrainerLib.RetrainerLib retrainer = new MLRetrainerLib.RetrainerLib(configobj); //Retraining a model takes two steps; queue up the job then start the job. One must save the jobID in order to start the job //STEP ONE: Assuming your config params are correct, you queue up a retraining job using this call. Be sure to save the jobID or else you won't be able to start //the job. If you don't start the job it will be automatically deleted after a few minutes (per the ML doc) Dictionary <string, string> gParms = new Dictionary <string, string>(); gParms.Add("Database query", "select * from TblVCCompanies"); gParms.Add("Fraction of rows in the first output dataset", "0.7"); gParms.Add("L1 regularization weight", "1"); //string jobID = retrainer.QueueRetrainingAsync(MLRetrainerLib.RetrainerLib.TRAINING_DATA_SOURCE.CLOUD_HOSTED, "select top(2000) * from TblVCCompanies").Result; string jobID = retrainer.QueueRetrainingAsync(MLRetrainerLib.RetrainerLib.TRAINING_DATA_SOURCE.CLOUD_HOSTED, null).Result; //STEP TWO: This is how you start the retraining job retrainer.StartRetrainingJob(jobID).Wait(); //We use this to watch the retraining so that we can decide if we want to deploy to the endpoint or not. //We spin here on the token until we show complete //This var is declared here and checked in the Do/While below. Of course you could also use Delegates or Threads to wait on this if the main thread has other //work you want it do do. MLRetrainerLib.BatchScoreStatusCode status; //STEP THREE: Wait for retraining completion //Here is when we spin lock until we show that the retraining job is Finished do { status = retrainer.CheckJobStatus(jobID).Result; Console.WriteLine(status.ToString()); } while (!(status == MLRetrainerLib.BatchScoreStatusCode.Finished)); //Now we look at the new (latest) results. //These are pulled from CSV files in the configured Azure Blob Storaged Console.WriteLine("New Scores for retraining..."); //Dictionary<string, double> scores = retrainer.GetLatestRetrainedResults(); //foreach (var val in scores) //{ // Console.WriteLine("Rating Name: {0} : Value: {1}", val.Key, val.Value.ToString()); //} //string resultOfRetrain = retrainer.GetLatestRetrainedResults(true); //Console.WriteLine(resultOfRetrain); }