/// <summary> /// Trains a model using SAR. /// </summary> /// <param name="settings">The training settings</param> /// <param name="usageEvents">The usage events to use for training</param> /// <param name="catalogItems">The catalog items to use for training</param> /// <param name="uniqueUsersCount">The number of users in the user id index file.</param> /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param> /// <param name="cancellationToken">A cancellation token</param> public IPredictorModel Train(ITrainingSettings settings, IList <SarUsageEvent> usageEvents, IList <SarCatalogItem> catalogItems, int uniqueUsersCount, int uniqueUsageItemsCount, CancellationToken cancellationToken) { if (settings == null) { throw new ArgumentNullException(nameof(settings)); } if (usageEvents == null) { throw new ArgumentNullException(nameof(usageEvents)); } if (settings.EnableColdItemPlacement && catalogItems == null) { throw new ArgumentNullException(nameof(catalogItems)); } if (uniqueUsersCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } if (uniqueUsageItemsCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } cancellationToken.ThrowIfCancellationRequested(); using (TlcEnvironment environment = new TlcEnvironment(verbose: true)) { _detectedFeatureWeights = null; try { environment.AddListener <ChannelMessage>(ChannelMessageListener); IHost environmentHost = environment.Register("SarHost"); // bind the cancellation token to SAR cancellation using (cancellationToken.Register(() => { environmentHost.StopExecution(); })) { _tracer.TraceInformation("Starting training model using SAR"); return(TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount, uniqueUsageItemsCount)); } } finally { environment.RemoveListener <ChannelMessage>(ChannelMessageListener); } } }
public void InferSchemaCommandTest() { var datasets = new[] { GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv")) }; using (var env = new TlcEnvironment()) { var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false); using (var ch = h.Start("InferSchemaCommandTest")) { for (int i = 0; i < datasets.Length; i++) { var outFile = string.Format("dataset-infer-schema-result-{0:00}.txt", i); string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile); var args = new InferSchemaCommand.Arguments() { DataFile = datasets[i], OutputFile = dataPath, }; var cmd = new InferSchemaCommand(Env, args); cmd.Run(); CheckEquality(Path.Combine("..", "Common", "Inference"), outFile); } } } Done(); }
/// <summary> /// The main method to invoke TLC, with some high level configuration options set. /// </summary> /// <param name="env">The environment used in this run of TLC, for the purpose of returning outputs.</param> /// <param name="args">The command line arguments.</param> /// <param name="alwaysPrintStacktrace">"Marked" exceptions are assumed to be sufficiently descriptive, so we /// do not print stack traces for them to the console, and instead print these only to a log file. /// However, throwing unmarked exceptions is considered a bug in TLC (even if due to bad user input), /// so we always write . If set to true though, this executable will also print stack traces from the /// marked exceptions as well.</param> /// <returns></returns> internal static int MainCore(TlcEnvironment env, string args, bool alwaysPrintStacktrace) { // REVIEW: How should extra dlls, tracking, etc be handled? Should the args objects for // all commands derive from a common base? var mainHost = env.Register("Main"); using (var telemetryPipe = mainHost.StartPipe <TelemetryMessage>("TelemetryPipe")) using (var ch = mainHost.Start("Main")) { int result; try { if (!CmdParser.TryGetFirstToken(args, out string kind, out string settings)) { telemetryPipe.Send(TelemetryMessage.CreateCommand("ArgumentParsingFailure", args)); Usage(); return(-1); } var cmdDef = new SubComponent <ICommand, SignatureCommand>(kind, settings); if (!ComponentCatalog.TryCreateInstance(mainHost, out ICommand cmd, cmdDef)) { // Telemetry: Log telemetryPipe.Send(TelemetryMessage.CreateCommand("UnknownCommand", settings)); ch.Error("Unknown command: '{0}'", kind); Usage(); return(-1); } // Telemetry: Log the command and settings. telemetryPipe.Send(TelemetryMessage.CreateCommand(kind.ToUpperInvariant(), settings)); cmd.Run(); result = 0; } catch (Exception ex) { var dumpFileDir = Path.Combine( Path.GetTempPath(), "TLC"); var dumpFilePath = Path.Combine(dumpFileDir, string.Format(CultureInfo.InvariantCulture, "Error_{0:yyyyMMdd_HHmmss}_{1}.log", DateTime.UtcNow, Guid.NewGuid())); bool isDumpSaved = false; try { Directory.CreateDirectory(dumpFileDir); // REVIEW: Should specify the encoding. using (var sw = new StreamWriter(new FileStream(dumpFilePath, FileMode.Create, FileAccess.Write))) { sw.WriteLine("--- Command line args ---"); sw.WriteLine(args); sw.WriteLine("--- Exception message ---"); PrintFullExceptionDetails(sw, ex); } isDumpSaved = true; } catch (Exception) { // Don't throw an exception if we failed to write to the dump file. } // Process exceptions that we understand. int count = 0; for (var e = ex; e != null; e = e.InnerException) { // Telemetry: Log the exception telemetryPipe.Send(TelemetryMessage.CreateException(e)); if (e.IsMarked()) { ch.Error(e.Sensitivity(), e.Message); PrintExceptionData(ch, e, false); count++; } } if (count == 0) { // Didn't recognize any of the exceptions. ch.Error(MessageSensitivity.None, "***** Unexpected failure. Please go to https://aka.ms/MLNetIssue and register the error details *****"); if (isDumpSaved) { ch.Error(MessageSensitivity.None, "***** Error log has been saved to '{0}', please register the error at https://aka.ms/MLNetIssue *****", dumpFilePath); } } else if (isDumpSaved) { ch.Error(MessageSensitivity.None, "Error log has been saved to '{0}'. please register the error at https://aka.ms/MLNetIssue", dumpFilePath); } if (count == 0 || alwaysPrintStacktrace) { ch.Error(MessageSensitivity.None, "===== Begin detailed dump ====="); PrintFullExceptionDetails(ch, ex); ch.Error(MessageSensitivity.None, "====== End detailed dump ====="); } // Return a negative result code so AEther recognizes this as a failure. result = count > 0 ? -1 : -2; } finally { } telemetryPipe.Done(); return(result); } }
public void DatasetInferenceTest() { var datasets = new[] { GetDataPath(@"..\UCI\adult.train"), GetDataPath(@"..\UCI\adult.test"), GetDataPath(@"..\UnitTest\breast-cancer.txt"), }; using (var env = new TlcEnvironment()) { var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false); using (var ch = h.Start("InferDatasetFeatures")) { for (int i = 0; i < datasets.Length; i++) { var sample = TextFileSample.CreateFromFullFile(h, datasets[i]); var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators); if (!splitResult.IsSuccess) { throw ch.ExceptDecode("Couldn't detect separator."); } var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample, new ColumnTypeInference.Arguments { Separator = splitResult.Separator, AllowSparse = splitResult.AllowSparse, AllowQuote = splitResult.AllowQuote, ColumnCount = splitResult.ColumnCount }); if (!typeInfResult.IsSuccess) { return; } ColumnGroupingInference.GroupingColumn[] columns = null; bool hasHeader = false; columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader); Guid id = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E"); var commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data, columns.Select( col => new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind, col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount, false, id, true); string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs); var outFile = string.Format("dataset-inference-result-{0:00}.txt", i); string dataPath = GetOutputPath(@"..\Common\Inference", outFile); using (var sw = new StreamWriter(File.Create(dataPath))) sw.WriteLine(jsonString); CheckEquality(@"..\Common\Inference", outFile); } } } Done(); }
/// <summary> /// Trains a model using SAR. /// </summary> /// <param name="settings">The training settings</param> /// <param name="usageEvents">The usage events to use for training</param> /// <param name="catalogItems">The catalog items to use for training</param> /// <param name="featureNames">The names of the catalog items features, in the same order as the feature values in the catalog</param> /// <param name="uniqueUsersCount">The number of users in the user id index file.</param> /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param> /// <param name="catalogFeatureWeights">The computed catalog items features weights (if relevant)</param> /// <param name="cancellationToken">A cancellation token</param> public IPredictorModel Train(ITrainingSettings settings, IList <SarUsageEvent> usageEvents, IList <SarCatalogItem> catalogItems, string[] featureNames, int uniqueUsersCount, int uniqueUsageItemsCount, out IDictionary <string, double> catalogFeatureWeights, CancellationToken cancellationToken) { if (settings == null) { throw new ArgumentNullException(nameof(settings)); } if (usageEvents == null) { throw new ArgumentNullException(nameof(usageEvents)); } if (settings.EnableColdItemPlacement && catalogItems == null) { throw new ArgumentNullException(nameof(catalogItems)); } if (uniqueUsersCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } if (uniqueUsageItemsCount < 0) { var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer"); _tracer.TraceWarning(exception.ToString()); throw exception; } cancellationToken.ThrowIfCancellationRequested(); using (TlcEnvironment environment = new TlcEnvironment(verbose: true)) { _detectedFeatureWeights = null; try { environment.AddListener <ChannelMessage>(ChannelMessageListener); IHost environmentHost = environment.Register("SarHost"); // bind the cancellation token to SAR cancellation using (cancellationToken.Register(() => { environmentHost.StopExecution(); })) { _tracer.TraceInformation("Starting training model using SAR"); IPredictorModel model = TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount, uniqueUsageItemsCount); catalogFeatureWeights = new Dictionary <string, double>(); if (_detectedFeatureWeights != null && featureNames != null) { if (_detectedFeatureWeights.Length == featureNames.Length) { for (int i = 0; i < featureNames.Length; i++) { catalogFeatureWeights[featureNames[i]] = _detectedFeatureWeights[i]; } } else { _tracer.TraceWarning( $"Found a mismatch between number of feature names ({featureNames.Length}) and the number of feature weights ({_detectedFeatureWeights.Length})"); } } return(model); } } finally { environment.RemoveListener <ChannelMessage>(ChannelMessageListener); } } }
public void TestCancellation() { var env = new TlcEnvironment(seed: 42); for (int z = 0; z < 1000; z++) { var mainHost = env.Register("Main"); var children = new ConcurrentDictionary <IHost, List <IHost> >(); var hosts = new BlockingCollection <Tuple <IHost, int> >(); hosts.Add(new Tuple <IHost, int>(mainHost.Register("1"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("2"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("3"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("4"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("5"), 1)); int iterations = 100; Random rand = new Random(); var addThread = new Thread( () => { for (int i = 0; i < iterations; i++) { var randHostTuple = hosts.ElementAt(rand.Next(hosts.Count - 1)); var newHost = randHostTuple.Item1.Register((randHostTuple.Item2 + 1).ToString()); hosts.Add(new Tuple <IHost, int>(newHost, randHostTuple.Item2 + 1)); if (!children.ContainsKey(randHostTuple.Item1)) { children[randHostTuple.Item1] = new List <IHost>(); } else { children[randHostTuple.Item1].Add(newHost); } } }); addThread.Start(); Queue <IHost> queue = new Queue <IHost>(); for (int i = 0; i < 5; i++) { IHost rootHost = null; var index = 0; do { index = rand.Next(hosts.Count); } while (hosts.ElementAt(index).Item1.IsCancelled || hosts.ElementAt(index).Item2 < 3); hosts.ElementAt(index).Item1.StopExecution(); rootHost = hosts.ElementAt(index).Item1; queue.Enqueue(rootHost); } addThread.Join(); while (queue.Count > 0) { var currentHost = queue.Dequeue(); Assert.True(currentHost.IsCancelled); if (children.ContainsKey(currentHost)) { children[currentHost].ForEach(x => queue.Enqueue(x)); } } } }