public void InferSchemaCommandTest()
        {
            var datasets = new[]
            {
                GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv"))
            };

            using (var env = new ConsoleEnvironment())
            {
                var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false);
                using (var ch = h.Start("InferSchemaCommandTest"))
                {
                    for (int i = 0; i < datasets.Length; i++)
                    {
                        var    outFile  = string.Format("dataset-infer-schema-result-{0:00}.txt", i);
                        string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile);
                        var    args     = new InferSchemaCommand.Arguments()
                        {
                            DataFile   = datasets[i],
                            OutputFile = dataPath,
                        };

                        var cmd = new InferSchemaCommand(Env, args);
                        cmd.Run();

                        CheckEquality(Path.Combine("..", "Common", "Inference"), outFile);
                    }
                }
            }
            Done();
        }
示例#2
0
        /// <summary>
        /// The main method to invoke TLC, with some high level configuration options set.
        /// </summary>
        /// <param name="env">The environment used in this run of TLC, for the purpose of returning outputs.</param>
        /// <param name="args">The command line arguments.</param>
        /// <param name="alwaysPrintStacktrace">"Marked" exceptions are assumed to be sufficiently descriptive, so we
        /// do not print stack traces for them to the console, and instead print these only to a log file.
        /// However, throwing unmarked exceptions is considered a bug in TLC (even if due to bad user input),
        /// so we always write . If set to true though, this executable will also print stack traces from the
        /// marked exceptions as well.</param>
        /// <returns></returns>
        internal static int MainCore(ConsoleEnvironment env, string args, bool alwaysPrintStacktrace)
        {
            // REVIEW: How should extra dlls, tracking, etc be handled? Should the args objects for
            // all commands derive from a common base?
            var mainHost = env.Register("Main");

            using (var telemetryPipe = mainHost.StartPipe <TelemetryMessage>("TelemetryPipe"))
                using (var ch = mainHost.Start("Main"))
                {
                    int result;
                    try
                    {
                        if (!CmdParser.TryGetFirstToken(args, out string kind, out string settings))
                        {
                            telemetryPipe.Send(TelemetryMessage.CreateCommand("ArgumentParsingFailure", args));
                            Usage();
                            return(-1);
                        }

                        if (!ComponentCatalog.TryCreateInstance <ICommand, SignatureCommand>(mainHost, out ICommand cmd, kind, settings))
                        {
                            // Telemetry: Log
                            telemetryPipe.Send(TelemetryMessage.CreateCommand("UnknownCommand", settings));
                            ch.Error("Unknown command: '{0}'", kind);
                            Usage();
                            return(-1);
                        }

                        // Telemetry: Log the command and settings.
                        telemetryPipe.Send(TelemetryMessage.CreateCommand(kind.ToUpperInvariant(), settings));
                        cmd.Run();

                        result = 0;
                    }
                    catch (Exception ex)
                    {
                        var dumpFileDir = Path.Combine(
                            Path.GetTempPath(),
                            "TLC");
                        var dumpFilePath = Path.Combine(dumpFileDir,
                                                        string.Format(CultureInfo.InvariantCulture, "Error_{0:yyyyMMdd_HHmmss}_{1}.log", DateTime.UtcNow, Guid.NewGuid()));
                        bool isDumpSaved = false;
                        try
                        {
                            Directory.CreateDirectory(dumpFileDir);
                            // REVIEW: Should specify the encoding.
                            using (var sw = new StreamWriter(new FileStream(dumpFilePath, FileMode.Create, FileAccess.Write)))
                            {
                                sw.WriteLine("--- Command line args ---");
                                sw.WriteLine(args);
                                sw.WriteLine("--- Exception message ---");
                                PrintFullExceptionDetails(sw, ex);
                            }

                            isDumpSaved = true;
                        }
                        catch (Exception)
                        {
                            // Don't throw an exception if we failed to write to the dump file.
                        }

                        // Process exceptions that we understand.
                        int count = 0;
                        for (var e = ex; e != null; e = e.InnerException)
                        {
                            // Telemetry: Log the exception
                            telemetryPipe.Send(TelemetryMessage.CreateException(e));
                            if (e.IsMarked())
                            {
                                ch.Error(e.Sensitivity(), e.Message);
                                PrintExceptionData(ch, e, false);
                                count++;
                            }
                        }

                        if (count == 0)
                        {
                            // Didn't recognize any of the exceptions.
                            ch.Error(MessageSensitivity.None, "***** Unexpected failure. Please refer to https://aka.ms/MLNetIssue to file an issue with details *****");
                            if (isDumpSaved)
                            {
                                ch.Error(MessageSensitivity.None, "***** Error log has been saved to '{0}', please refer to https://aka.ms/MLNetIssue to file an issue with details *****",
                                         dumpFilePath);
                            }
                        }
                        else if (isDumpSaved)
                        {
                            ch.Error(MessageSensitivity.None, "Error log has been saved to '{0}'. Please refer to https://aka.ms/MLNetIssue if you need assistance.",
                                     dumpFilePath);
                        }

                        if (count == 0 || alwaysPrintStacktrace)
                        {
                            ch.Error(MessageSensitivity.None, "===== Begin detailed dump =====");
                            PrintFullExceptionDetails(ch, ex);
                            ch.Error(MessageSensitivity.None, "====== End detailed dump =====");
                        }

                        // Return a negative result code so AEther recognizes this as a failure.
                        result = count > 0 ? -1 : -2;
                    }
                    finally
                    {
                    }
                    telemetryPipe.Done();
                    return(result);
                }
        }
        public void DatasetInferenceTest()
        {
            var datasets = new[]
            {
                GetDataPath(@"..\UCI\adult.train"),
                GetDataPath(@"..\UCI\adult.test"),
                GetDataPath(@"..\UnitTest\breast-cancer.txt"),
            };

            using (var env = new ConsoleEnvironment())
            {
                var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false);

                using (var ch = h.Start("InferDatasetFeatures"))
                {
                    for (int i = 0; i < datasets.Length; i++)
                    {
                        var sample      = TextFileSample.CreateFromFullFile(h, datasets[i]);
                        var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators);
                        if (!splitResult.IsSuccess)
                        {
                            throw ch.ExceptDecode("Couldn't detect separator.");
                        }

                        var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample,
                                                                                         new ColumnTypeInference.Arguments
                        {
                            Separator   = splitResult.Separator,
                            AllowSparse = splitResult.AllowSparse,
                            AllowQuote  = splitResult.AllowQuote,
                            ColumnCount = splitResult.ColumnCount
                        });

                        if (!typeInfResult.IsSuccess)
                        {
                            return;
                        }

                        ColumnGroupingInference.GroupingColumn[] columns = null;
                        bool hasHeader = false;
                        columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader);
                        Guid id          = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E");
                        var  commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data,
                                                                                 columns.Select(
                                                                                     col =>
                                                                                     new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind,
                                                                                                                        col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount,
                                                                                 false, id, true);

                        string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs);
                        var    outFile    = string.Format("dataset-inference-result-{0:00}.txt", i);
                        string dataPath   = GetOutputPath(@"..\Common\Inference", outFile);
                        using (var sw = new StreamWriter(File.Create(dataPath)))
                            sw.WriteLine(jsonString);

                        CheckEquality(@"..\Common\Inference", outFile);
                    }
                }
            }
            Done();
        }
示例#4
0
        public void TestCancellation()
        {
            var env = new ConsoleEnvironment(seed: 42);

            for (int z = 0; z < 1000; z++)
            {
                var mainHost = env.Register("Main");
                var children = new ConcurrentDictionary <IHost, List <IHost> >();
                var hosts    = new BlockingCollection <Tuple <IHost, int> >();
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("1"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("2"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("3"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("4"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("5"), 1));

                int    iterations = 100;
                Random rand       = new Random();
                var    addThread  = new Thread(
                    () =>
                {
                    for (int i = 0; i < iterations; i++)
                    {
                        var randHostTuple = hosts.ElementAt(rand.Next(hosts.Count - 1));
                        var newHost       = randHostTuple.Item1.Register((randHostTuple.Item2 + 1).ToString());
                        hosts.Add(new Tuple <IHost, int>(newHost, randHostTuple.Item2 + 1));
                        if (!children.ContainsKey(randHostTuple.Item1))
                        {
                            children[randHostTuple.Item1] = new List <IHost>();
                        }
                        else
                        {
                            children[randHostTuple.Item1].Add(newHost);
                        }
                    }
                });
                addThread.Start();
                Queue <IHost> queue = new Queue <IHost>();
                for (int i = 0; i < 5; i++)
                {
                    IHost rootHost = null;
                    var   index    = 0;
                    do
                    {
                        index = rand.Next(hosts.Count);
                    } while (hosts.ElementAt(index).Item1.IsCancelled || hosts.ElementAt(index).Item2 < 3);
                    hosts.ElementAt(index).Item1.StopExecution();
                    rootHost = hosts.ElementAt(index).Item1;
                    queue.Enqueue(rootHost);
                }
                addThread.Join();
                while (queue.Count > 0)
                {
                    var currentHost = queue.Dequeue();
                    Assert.True(currentHost.IsCancelled);

                    if (children.ContainsKey(currentHost))
                    {
                        children[currentHost].ForEach(x => queue.Enqueue(x));
                    }
                }
            }
        }