static async Task Main(string[] args) { try { var modelBuilder = new ModelTrainer( ModelHelpers.GetAssetsPath("data", "tags.tsv"), ModelHelpers.GetAssetsPath("images"), ModelHelpers.GetAssetsPath("model", "tensorflow_inception_graph.pb"), ModelHelpers.GetAssetsPath("model", "imageClassifier.zip")); await modelBuilder.BuildAndTrain(); var modelEvaluator = new ModelEvaluator( ModelHelpers.GetAssetsPath("data", "tags.tsv"), ModelHelpers.GetAssetsPath("images"), ModelHelpers.GetAssetsPath("model", "imageClassifier.zip")); await modelEvaluator.Evaluate(); } catch (Exception ex) { } }
static async Task Main(string[] args) { var assetsPath = ModelHelpers.GetAssetsPath(@"..\..\..\assets"); var tagsTsv = Path.Combine(assetsPath, "inputs", "data", "tags.tsv"); var imagesFolder = Path.Combine(assetsPath, "inputs", "data"); var imageClassifierZip = Path.Combine(assetsPath, "inputs", "imageClassifier.zip"); try { var modelEvaluator = new ModelEvaluator(tagsTsv, imagesFolder, imageClassifierZip); modelEvaluator.EvaluateStaticApi(); } catch (Exception ex) { ConsoleWriteException(ex.Message); } ConsolePressAnyKey(); }
static void Main(string[] args) { var trainingDataLocation = @"Data/winequality_white_train.csv"; var testDataLocation = @"Data/winequality_white_test.csv"; var modelEvaluator = new ModelEvaluator(); var perceptronBinaryModel = new ModelBuilder(trainingDataLocation, new AveragedPerceptronBinaryClassifier()).BuildAndTrain(); var perceptronBinaryMetrics = modelEvaluator.Evaluate(perceptronBinaryModel, testDataLocation); PrintMetrics("Perceptron", perceptronBinaryMetrics); var fastForestBinaryModel = new ModelBuilder(trainingDataLocation, new FastForestBinaryClassifier()).BuildAndTrain(); var fastForestBinaryMetrics = modelEvaluator.Evaluate(fastForestBinaryModel, testDataLocation); PrintMetrics("Fast Forest Binary", fastForestBinaryMetrics); var fastTreeBinaryModel = new ModelBuilder(trainingDataLocation, new FastTreeBinaryClassifier()).BuildAndTrain(); var fastTreeBinaryMetrics = modelEvaluator.Evaluate(fastTreeBinaryModel, testDataLocation); PrintMetrics("Fast Tree Binary", fastTreeBinaryMetrics); var linearSvmModel = new ModelBuilder(trainingDataLocation, new LinearSvmBinaryClassifier()).BuildAndTrain(); var linearSvmMetrics = modelEvaluator.Evaluate(linearSvmModel, testDataLocation); PrintMetrics("Linear SVM", linearSvmMetrics); var logisticRegressionModel = new ModelBuilder(trainingDataLocation, new LogisticRegressionBinaryClassifier()).BuildAndTrain(); var logisticRegressionMetrics = modelEvaluator.Evaluate(logisticRegressionModel, testDataLocation); PrintMetrics("Logistic Regression Binary", logisticRegressionMetrics); var sdcabModel = new ModelBuilder(trainingDataLocation, new StochasticDualCoordinateAscentBinaryClassifier()).BuildAndTrain(); var sdcabMetrics = modelEvaluator.Evaluate(sdcabModel, testDataLocation); PrintMetrics("Stochastic Dual Coordinate Ascent Binary", logisticRegressionMetrics); VisualizeTenPredictionsForTheModel(fastForestBinaryModel, testDataLocation); Console.ReadLine(); }
static async Task Main(string[] args) { // Running inside Visual Studio, $SolutionDir/assets is automatically passed as argument // If you execute from the console, pass as argument the location of the assets folder // Otherwise, it will search for assets in the executable's folder var assetsPath = args.Length > 0 ? args[0] : ModelHelpers.GetAssetsPath(); var tagsTsv = Path.Combine(assetsPath, "inputs", "data", "tags.tsv"); var imagesFolder = Path.Combine(assetsPath, "inputs", "data"); var imageClassifierZip = Path.Combine(assetsPath, "outputs", "imageClassifier.zip"); try { var modelEvaluator = new ModelEvaluator(tagsTsv, imagesFolder, imageClassifierZip); await modelEvaluator.Evaluate(); } catch (Exception ex) { Console.WriteLine($"Exception: {ex.Message}"); } Console.ReadKey(); }
static async Task Main(string[] args) { // Running inside Visual Studio, $SolutionDir/assets is automatically passed as argument // If you execute from the console, pass as argument the location of the assets folder // Otherwise, it will search for assets in the executable's folder var assetsPath = args.Length > 0 ? args[0] : ModelHelpers.GetAssetsPath(); var transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv"); var offersCsv = Path.Combine(assetsPath, "inputs", "offers.csv"); var modelZip = Path.Combine(assetsPath, "outputs", "retailClustering.zip"); var plotSvg = Path.Combine(assetsPath, "outputs", "customerSegmentation.svg"); try { var modelEvaluator = new ModelEvaluator(transactionsCsv, offersCsv, modelZip, plotSvg); await modelEvaluator.Evaluate(); } catch (Exception ex) { Console.WriteLine($"Exception: {ex.Message}"); } Console.ReadKey(); }
static async Task Main(string[] args) { try { if (typeof(TensorFlowTransform) == null) { throw new Exception("Tensorflow not loaded correctly"); } if (typeof(ImageLoaderTransform) == null) { throw new Exception("ImageAnalytics not loaded correctly"); } var modelBuilder = new ModelTrainer( ModelHelpers.GetAssetsPath("data", "tags.tsv"), ModelHelpers.GetAssetsPath("images"), ModelHelpers.GetAssetsPath("model", "tensorflow_inception_graph.pb"), ModelHelpers.GetAssetsPath("model", "imageClassifier.zip")); await modelBuilder.BuildAndTrain(); var modelEvaluator = new ModelEvaluator( ModelHelpers.GetAssetsPath("data", "tags.tsv"), ModelHelpers.GetAssetsPath("images"), ModelHelpers.GetAssetsPath("model", "imageClassifier.zip")); await modelEvaluator.Evaluate(); } catch (Exception ex) { Console.WriteLine("InnerException: {0}", ex.InnerException.ToString()); throw; } Console.WriteLine("End of process"); Console.ReadKey(); }
/// <summary> /// Trains a model using the input files /// </summary> /// <param name="settings">The trainer settings</param> /// <param name="workFolderPath">A temp work folder for storing intermediate files</param> /// <param name="usageFolderPath">The path to the folder of usage files</param> /// <param name="catalogFilePath">The path to the catalog file</param> /// <param name="evaluationFolderPath">The path to the evaluation file (optional) </param> /// <param name="cancellationToken">A cancellation token used to abort the training</param> private ModelTrainResult TrainModelInternal(IModelTrainerSettings settings, string workFolderPath, string usageFolderPath, string catalogFilePath, string evaluationFolderPath, CancellationToken cancellationToken) { var duration = ModelTraininigDuration.Start(); var result = new ModelTrainResult { Duration = duration }; var userIdsIndexMap = new ConcurrentDictionary <string, uint>(); var itemIdsIndexMap = new ConcurrentDictionary <string, uint>(); // parse the catalog file IList <SarCatalogItem> catalogItems = null; string[] catalogFeatureNames = null; if (!string.IsNullOrWhiteSpace(catalogFilePath) && File.Exists(catalogFilePath)) { // report progress _progressMessageReportDelegate("Parsing Catalog File"); // create a catalog file parser var catalogParser = new CatalogFileParser(MaximumParsingErrorsCount, itemIdsIndexMap, _tracer); // parse the catalog file result.CatalogFilesParsingReport = catalogParser.ParseCatalogFile(catalogFilePath, cancellationToken, out catalogItems, out catalogFeatureNames); // record the catalog parsing duration duration.SetCatalogParsingDuration(); _tracer.TraceInformation($"Catalog parsing completed in {duration.CatalogParsingDuration.TotalMinutes} minutes"); // get the catalog items count result.CatalogItemsCount = catalogItems.Count; // fail the training if parsing had failed or yielded no items if (!result.CatalogFilesParsingReport.IsCompletedSuccessfuly || !catalogItems.Any()) { result.CompletionMessage = "Failed to parse catalog file or parsing found no valid items"; _tracer.TraceInformation(result.CompletionMessage); return(result); } // clear the catalog items list if it's not used anymore if (!settings.EnableColdItemPlacement) { catalogItems.Clear(); } } // report progress _progressMessageReportDelegate("Parsing Usage Events Files"); // create a usage events files parser that skips events of unknown item ids (if catalog was provided)) var usageEventsParser = new UsageEventsFilesParser(itemIdsIndexMap, userIdsIndexMap, MaximumParsingErrorsCount, catalogItems != null, _tracer); _tracer.TraceInformation("Parsing the usage event files"); IList <SarUsageEvent> usageEvents; result.UsageFilesParsingReport = usageEventsParser.ParseUsageEventFiles(usageFolderPath, cancellationToken, out usageEvents); // record the usage files parsing duration duration.SetUsageFilesParsingDuration(); _tracer.TraceInformation($"Usage file(s) parsing completed in {duration.UsageFilesParsingDuration.TotalMinutes} minutes"); // fail the training if parsing had failed or yielded no events if (!result.UsageFilesParsingReport.IsCompletedSuccessfuly || !usageEvents.Any()) { result.CompletionMessage = "Failed to parse usage file(s) or parsing found no valid items"; _tracer.TraceInformation(result.CompletionMessage); return(result); } _tracer.TraceInformation($"Found {userIdsIndexMap.Count} unique users"); result.UniqueUsersCount = userIdsIndexMap.Count; _tracer.TraceInformation($"Found {itemIdsIndexMap.Count} unique items"); result.UniqueItemsCount = usageEvents.Select(x => x.ItemId).Distinct().Count(); _tracer.TraceInformation("Extracting the indexed item ids from the item index map"); string[] itemIdsIndex = itemIdsIndexMap.OrderBy(kvp => kvp.Value).Select(kvp => kvp.Key).ToArray(); _tracer.TraceInformation($"Sorting the usage events based on the cooccurrenceUnit unit ({settings.CooccurrenceUnit})"); switch (settings.CooccurrenceUnit) { case CooccurrenceUnit.User: usageEvents = usageEvents.OrderBy(x => x.UserId).ToArray(); break; case CooccurrenceUnit.Timestamp: usageEvents = usageEvents.OrderBy(x => x.Timestamp).ThenBy(x => x.UserId).ToArray(); break; } _tracer.TraceInformation("Finished sorting usage events."); Stopwatch storeUserHistoryDuration = null; Task storeUserHistoryTask = null; if (settings.EnableUserToItemRecommendations && _userHistoryStore != null) { storeUserHistoryDuration = Stopwatch.StartNew(); _tracer.TraceInformation($"Extracting the indexed user ids from the user index map ({userIdsIndexMap.Count:N} users)"); string[] userIdsIndex = userIdsIndexMap.OrderBy(kvp => kvp.Value).Select(kvp => kvp.Key).ToArray(); _tracer.TraceInformation($"Asynchronously starting to store usage events per user (total of {usageEvents.Count:N} items)"); storeUserHistoryTask = Task.Run(() => _userHistoryStore.StoreUserHistoryEventsAsync(usageEvents, userIdsIndex, cancellationToken), cancellationToken); } // if provided, parse the evaluation usage event files int evaluationUsageEventsCount = 0; string parsedEvaluationUsageEventsFilePath = null; if (!string.IsNullOrWhiteSpace(evaluationFolderPath) && Directory.Exists(evaluationFolderPath)) { // report progress _progressMessageReportDelegate("Parsing Evaluation Usage Events Files"); _tracer.TraceInformation("Parsing the evaluation usage event files"); IList <SarUsageEvent> evaluationUsageEvents; result.EvaluationFilesParsingReport = usageEventsParser.ParseUsageEventFiles(evaluationFolderPath, cancellationToken, out evaluationUsageEvents); if (result.EvaluationFilesParsingReport.IsCompletedSuccessfuly) { // set the evaluation usage events count evaluationUsageEventsCount = evaluationUsageEvents.Count; _tracer.TraceInformation("Storing the parsed usage events for evaluation to reduce memory print"); parsedEvaluationUsageEventsFilePath = Path.Combine(workFolderPath, Path.GetTempFileName()); File.WriteAllLines(parsedEvaluationUsageEventsFilePath, evaluationUsageEvents.Select(JsonConvert.SerializeObject)); } else { _tracer.TraceWarning("Skipping model evaluation as it failed to parse evaluation usage files."); } // record the evaluation usage files parsing duration duration.SetEvaluationUsageFilesParsingDuration(); _tracer.TraceInformation($"Evaluation usage file(s) parsing completed in {duration.EvaluationUsageFilesParsingDuration.TotalMinutes} minutes"); } // clear the indices maps as they are no longer needed userIdsIndexMap.Clear(); itemIdsIndexMap.Clear(); cancellationToken.ThrowIfCancellationRequested(); // report progress _progressMessageReportDelegate("Core Training"); _tracer.TraceInformation("Training a new model using SAR trainer"); IDictionary <string, double> catalogFeatureWeights; var sarTrainer = new SarTrainer(_tracer); IPredictorModel sarModel = sarTrainer.Train(settings, usageEvents, catalogItems, catalogFeatureNames, result.UniqueUsersCount, result.CatalogItemsCount ?? result.UniqueItemsCount, out catalogFeatureWeights, cancellationToken); _tracer.TraceInformation("SAR training was completed."); // create the trained model properties var modelProperties = new ModelProperties { IncludeHistory = settings.AllowSeedItemsInRecommendations, EnableUserAffinity = settings.EnableUserAffinity, IsUserToItemRecommendationsSupported = settings.EnableUserToItemRecommendations, Decay = TimeSpan.FromDays(settings.DecayPeriodInDays), ReferenceDate = usageEventsParser.MostRecentEventTimestamp, UniqueUsersCount = result.UniqueUsersCount, }; // create the trained model result.Model = new TrainedModel(sarModel, modelProperties, itemIdsIndex); // set the catalog features weights result.CatalogFeatureWeights = catalogFeatureWeights; // record the core training duration duration.SetTrainingDuration(); // run model evaluation if evaluation usage event are available if (evaluationUsageEventsCount > 0 && parsedEvaluationUsageEventsFilePath != null) { // report progress _progressMessageReportDelegate("Evaluating Trained Model"); var evaluationUsageEvents = new List <SarUsageEvent>(evaluationUsageEventsCount); // load the evaluation usage events using (var reader = new StreamReader(parsedEvaluationUsageEventsFilePath)) { while (!reader.EndOfStream) { evaluationUsageEvents.Add(JsonConvert.DeserializeObject <SarUsageEvent>(reader.ReadLine())); } } _tracer.TraceInformation("Starting model evaluation"); var evaluator = new ModelEvaluator(_tracer); result.ModelMetrics = evaluator.Evaluate(result.Model, usageEvents, evaluationUsageEvents, cancellationToken); // record the evaluation duration duration.SetEvaluationDuration(); } if (storeUserHistoryTask != null) { _tracer.TraceInformation("Waiting for storing of usage events per user (user history) to complete"); if (!storeUserHistoryTask.IsCompleted) { _progressMessageReportDelegate("Storing User History"); // set the reporting flag to true so usage history upload progress will get reported to model status _reportUserHistoryProgress = true; } try { storeUserHistoryTask.Wait(cancellationToken); storeUserHistoryDuration?.Stop(); duration.StoringUserHistoryDuration = storeUserHistoryDuration?.Elapsed; _tracer.TraceInformation( $"Storing usage events per user (user history) to complete after {duration.StoringUserHistoryDuration.Value.TotalMinutes} minutes"); } catch (AggregateException ex) { var exception = new Exception("Exception while trying to store user history", ex); _tracer.TraceError(exception.ToString()); throw exception; } } // stop measuring the duration and record the total duration duration.Stop(); // return the train result result.CompletionMessage = "Model Training Completed Successfully"; return(result); }
static void Main(string[] args) { try { ArgCollection argCollection = ArgCollection.GetInstance(args); if (argCollection.ExtractOptionalFlag("help")) { Console.WriteLine(""); Console.WriteLine(UsageMessage); Console.WriteLine(HelpMessage); return; } string optimizerName = argCollection.ExtractOptional <string>("optimizer", "BrentThenGrid"); string keepTestName = argCollection.ExtractOptional <string>("keepTest", "AlwaysKeep"); string skipRowIndexFileNameOrNull = argCollection.ExtractOptional <string>("skipRowIndexFile", null); argCollection.CheckNoMoreOptions(); string treeFileName = argCollection.ExtractNext <string>("treeFile"); string predictorFileName = argCollection.ExtractNext <string>("predictorFile"); string targetFileName = argCollection.ExtractNext <string>("targetFile"); string leafDistributionName = argCollection.ExtractNext <string>("leafDistribution"); string nullDataGeneratorName = argCollection.ExtractNext <string>("nullDataGenerator"); string niceName = argCollection.ExtractNext <string>("niceName"); string outputDirectory = argCollection.ExtractNext <string>("outputDirectory"); RangeCollection pieceIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("pieceIndexRange"); int pieceCount = argCollection.ExtractNext <int>("pieceCount"); RangeCollection nullIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("nullIndexRange"); argCollection.CheckThatEmpty(); if (!PhyloDDriver.ValidateDistribution(leafDistributionName)) { Console.WriteLine("{0} is not a recognized distribution name. Please choose a name from the following list:", leafDistributionName); foreach (string name in PhyloDDriver.GetDistributionNames()) { Console.WriteLine("\t{0}", name); } throw new ArgumentException("Invalid distribution name."); } RangeCollection skipRowIndexRangeCollectionOrNull = (null == skipRowIndexFileNameOrNull) || skipRowIndexFileNameOrNull == "null" ? null : RangeCollection.Parse(File.ReadAllText(skipRowIndexFileNameOrNull)); KeepTest <Dictionary <string, string> > keepTest = KeepTest <Dictionary <string, string> > .GetInstance(null, keepTestName); SpecialFunctions.CheckCondition(pieceIndexRangeCollection.IsBetween(0, pieceCount - 1), "pieceIndex must be at least 0 and less than pieceCount"); SpecialFunctions.CheckCondition(nullIndexRangeCollection.IsBetween(-1, int.MaxValue), "nullIndex must be at least -1"); PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null); ModelScorer modelScorer = ModelScorer.GetInstance(aPhyloTree, leafDistributionName, optimizerName); ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(leafDistributionName, modelScorer); PhyloDDriver driver = PhyloDDriver.GetInstance(); driver.Run( modelEvaluator, predictorFileName, targetFileName, leafDistributionName, nullDataGeneratorName, keepTest, skipRowIndexRangeCollectionOrNull, niceName, outputDirectory, pieceIndexRangeCollection, pieceCount, nullIndexRangeCollection, optimizerName); //Console.Write("Press enter to exist."); //Console.Read(); } catch (Exception exception) { Console.WriteLine(""); Console.WriteLine(exception.Message); if (exception.InnerException != null) { Console.WriteLine(exception.InnerException.Message); } Console.WriteLine(""); Console.WriteLine(UsageMessage); throw; } }
/// <summary> /// Does the work. /// </summary> public override void DoWork() { // get our input data and null the field to make sure we don't serialize it back InputData inputData = mInputData; mInputData = null; // get the job-specific names of input files FileDefCollection fileDefs = Job.FileDefs; string treeFileName = Utility.GetNamedFileDef(fileDefs, Constants.TreeFileDefName).LocalName; string predictorFileName = Utility.GetNamedFileDef(fileDefs, Constants.PredictorFileDefName).LocalName; string targetFileName = Utility.GetNamedFileDef(fileDefs, Constants.TargetFileDefName).LocalName; string skipRowIndexFileName = Utility.GetNamedFileDef(fileDefs, Constants.SkipRowIndexFileDefName).LocalName; // construct RangeCollections RangeCollection pieceIndexRangeCollection = RangeCollection.Parse(inputData.PieceIndexRange); RangeCollection nullIndexRangeCollection = RangeCollection.Parse(inputData.NullIndexRange); RangeCollection skipRowIndexRangeCollection; FileInfo fileInfo = new FileInfo(skipRowIndexFileName); if (fileInfo.Length > 0) { skipRowIndexRangeCollection = RangeCollection.Parse(File.ReadAllText(skipRowIndexFileName)); } else { skipRowIndexRangeCollection = null; } // do the rest PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null); ModelScorer modelScorer = ModelScorer.GetInstance(aPhyloTree, inputData.LeafDistributionName, inputData.OptimizerName); ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(inputData.LeafDistributionName, modelScorer); KeepTest <Dictionary <string, string> > keepTest = KeepTest <Dictionary <string, string> > .GetInstance(null, inputData.KeepTestName); PhyloDDriver driver = PhyloDDriver.GetInstance(); // create a name for the temporary job sandbox. This directory gets created by driver.Run(...) string agentOutputDirectoryName = Path.Combine(Environment.CurrentDirectory, String.Format(CultureInfo.InvariantCulture, "{0}.{1}", Job.JobId, Task.TaskId)); // save the standard out and standard error in memory streams using (MemoryStream streamOut = new MemoryStream(), streamError = new MemoryStream()) { try { // redirect the outputs using ( StreamWriter writerOut = new StreamWriter(streamOut), writerError = new StreamWriter(streamError)) { Console.SetOut(writerOut); Console.SetError(writerError); try { // run the model string outputFileName = driver.Run( modelEvaluator, predictorFileName, targetFileName, inputData.LeafDistributionName, inputData.NullDataGeneratorName, keepTest, skipRowIndexRangeCollection, inputData.NiceName, agentOutputDirectoryName, pieceIndexRangeCollection, inputData.PieceCount, nullIndexRangeCollection, inputData.OptimizerName); // this is the expected output file name -- save this so it can be written on the master side with the same name. mOutputFileName = Path.GetFileName(outputFileName); mLocalOutputFileName = Path.Combine(inputData.LocalOutputDirectoryName, mOutputFileName); // get the output data string fullOutputPath = Path.Combine(agentOutputDirectoryName, mOutputFileName); if (!File.Exists(fullOutputPath)) { TaskResult.FailureReason = TaskFailureReason.MissingOutput; TaskResult.FailureMessage = String.Format(CultureInfo.CurrentCulture, "Cannot find output file '{0}'", targetFileName); TaskResult.Status = TaskAssignmentStatus.Failed; } using (StreamReader outputData = new StreamReader(fullOutputPath)) { mOutputData = outputData.ReadToEnd(); } } finally { // this finally is to make sure we delete the folder // get rid of the sandbox Directory.Delete(agentOutputDirectoryName, true); } } } finally { // this finally is to make sure we get console output Encoding encoding = Encoding.Default; TaskResult.StandardOutput = encoding.GetString(streamOut.GetBuffer()); TaskResult.StandardError = encoding.GetString(streamError.GetBuffer()); } } }