public void InferSchemaCommandTest() { var datasets = new[] { GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv")) }; using (var env = new ConsoleEnvironment()) { var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false); using (var ch = h.Start("InferSchemaCommandTest")) { for (int i = 0; i < datasets.Length; i++) { var outFile = string.Format("dataset-infer-schema-result-{0:00}.txt", i); string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile); var args = new InferSchemaCommand.Arguments() { DataFile = datasets[i], OutputFile = dataPath, }; var cmd = new InferSchemaCommand(Env, args); cmd.Run(); CheckEquality(Path.Combine("..", "Common", "Inference"), outFile); } } } Done(); }
/// <summary> /// The main method to invoke TLC, with some high level configuration options set. /// </summary> /// <param name="env">The environment used in this run of TLC, for the purpose of returning outputs.</param> /// <param name="args">The command line arguments.</param> /// <param name="alwaysPrintStacktrace">"Marked" exceptions are assumed to be sufficiently descriptive, so we /// do not print stack traces for them to the console, and instead print these only to a log file. /// However, throwing unmarked exceptions is considered a bug in TLC (even if due to bad user input), /// so we always write . If set to true though, this executable will also print stack traces from the /// marked exceptions as well.</param> /// <returns></returns> internal static int MainCore(ConsoleEnvironment env, string args, bool alwaysPrintStacktrace) { // REVIEW: How should extra dlls, tracking, etc be handled? Should the args objects for // all commands derive from a common base? var mainHost = env.Register("Main"); using (var telemetryPipe = mainHost.StartPipe <TelemetryMessage>("TelemetryPipe")) using (var ch = mainHost.Start("Main")) { int result; try { if (!CmdParser.TryGetFirstToken(args, out string kind, out string settings)) { telemetryPipe.Send(TelemetryMessage.CreateCommand("ArgumentParsingFailure", args)); Usage(); return(-1); } if (!ComponentCatalog.TryCreateInstance <ICommand, SignatureCommand>(mainHost, out ICommand cmd, kind, settings)) { // Telemetry: Log telemetryPipe.Send(TelemetryMessage.CreateCommand("UnknownCommand", settings)); ch.Error("Unknown command: '{0}'", kind); Usage(); return(-1); } // Telemetry: Log the command and settings. telemetryPipe.Send(TelemetryMessage.CreateCommand(kind.ToUpperInvariant(), settings)); cmd.Run(); result = 0; } catch (Exception ex) { var dumpFileDir = Path.Combine( Path.GetTempPath(), "TLC"); var dumpFilePath = Path.Combine(dumpFileDir, string.Format(CultureInfo.InvariantCulture, "Error_{0:yyyyMMdd_HHmmss}_{1}.log", DateTime.UtcNow, Guid.NewGuid())); bool isDumpSaved = false; try { Directory.CreateDirectory(dumpFileDir); // REVIEW: Should specify the encoding. using (var sw = new StreamWriter(new FileStream(dumpFilePath, FileMode.Create, FileAccess.Write))) { sw.WriteLine("--- Command line args ---"); sw.WriteLine(args); sw.WriteLine("--- Exception message ---"); PrintFullExceptionDetails(sw, ex); } isDumpSaved = true; } catch (Exception) { // Don't throw an exception if we failed to write to the dump file. } // Process exceptions that we understand. int count = 0; for (var e = ex; e != null; e = e.InnerException) { // Telemetry: Log the exception telemetryPipe.Send(TelemetryMessage.CreateException(e)); if (e.IsMarked()) { ch.Error(e.Sensitivity(), e.Message); PrintExceptionData(ch, e, false); count++; } } if (count == 0) { // Didn't recognize any of the exceptions. ch.Error(MessageSensitivity.None, "***** Unexpected failure. Please refer to https://aka.ms/MLNetIssue to file an issue with details *****"); if (isDumpSaved) { ch.Error(MessageSensitivity.None, "***** Error log has been saved to '{0}', please refer to https://aka.ms/MLNetIssue to file an issue with details *****", dumpFilePath); } } else if (isDumpSaved) { ch.Error(MessageSensitivity.None, "Error log has been saved to '{0}'. Please refer to https://aka.ms/MLNetIssue if you need assistance.", dumpFilePath); } if (count == 0 || alwaysPrintStacktrace) { ch.Error(MessageSensitivity.None, "===== Begin detailed dump ====="); PrintFullExceptionDetails(ch, ex); ch.Error(MessageSensitivity.None, "====== End detailed dump ====="); } // Return a negative result code so AEther recognizes this as a failure. result = count > 0 ? -1 : -2; } finally { } telemetryPipe.Done(); return(result); } }
public void DatasetInferenceTest() { var datasets = new[] { GetDataPath(@"..\UCI\adult.train"), GetDataPath(@"..\UCI\adult.test"), GetDataPath(@"..\UnitTest\breast-cancer.txt"), }; using (var env = new ConsoleEnvironment()) { var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false); using (var ch = h.Start("InferDatasetFeatures")) { for (int i = 0; i < datasets.Length; i++) { var sample = TextFileSample.CreateFromFullFile(h, datasets[i]); var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators); if (!splitResult.IsSuccess) { throw ch.ExceptDecode("Couldn't detect separator."); } var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample, new ColumnTypeInference.Arguments { Separator = splitResult.Separator, AllowSparse = splitResult.AllowSparse, AllowQuote = splitResult.AllowQuote, ColumnCount = splitResult.ColumnCount }); if (!typeInfResult.IsSuccess) { return; } ColumnGroupingInference.GroupingColumn[] columns = null; bool hasHeader = false; columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader); Guid id = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E"); var commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data, columns.Select( col => new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind, col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount, false, id, true); string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs); var outFile = string.Format("dataset-inference-result-{0:00}.txt", i); string dataPath = GetOutputPath(@"..\Common\Inference", outFile); using (var sw = new StreamWriter(File.Create(dataPath))) sw.WriteLine(jsonString); CheckEquality(@"..\Common\Inference", outFile); } } } Done(); }
public void TestCancellation() { var env = new ConsoleEnvironment(seed: 42); for (int z = 0; z < 1000; z++) { var mainHost = env.Register("Main"); var children = new ConcurrentDictionary <IHost, List <IHost> >(); var hosts = new BlockingCollection <Tuple <IHost, int> >(); hosts.Add(new Tuple <IHost, int>(mainHost.Register("1"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("2"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("3"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("4"), 1)); hosts.Add(new Tuple <IHost, int>(mainHost.Register("5"), 1)); int iterations = 100; Random rand = new Random(); var addThread = new Thread( () => { for (int i = 0; i < iterations; i++) { var randHostTuple = hosts.ElementAt(rand.Next(hosts.Count - 1)); var newHost = randHostTuple.Item1.Register((randHostTuple.Item2 + 1).ToString()); hosts.Add(new Tuple <IHost, int>(newHost, randHostTuple.Item2 + 1)); if (!children.ContainsKey(randHostTuple.Item1)) { children[randHostTuple.Item1] = new List <IHost>(); } else { children[randHostTuple.Item1].Add(newHost); } } }); addThread.Start(); Queue <IHost> queue = new Queue <IHost>(); for (int i = 0; i < 5; i++) { IHost rootHost = null; var index = 0; do { index = rand.Next(hosts.Count); } while (hosts.ElementAt(index).Item1.IsCancelled || hosts.ElementAt(index).Item2 < 3); hosts.ElementAt(index).Item1.StopExecution(); rootHost = hosts.ElementAt(index).Item1; queue.Enqueue(rootHost); } addThread.Join(); while (queue.Count > 0) { var currentHost = queue.Dequeue(); Assert.True(currentHost.IsCancelled); if (children.ContainsKey(currentHost)) { children[currentHost].ForEach(x => queue.Enqueue(x)); } } } }