Ejemplo n.º 1
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        /// <param name="settings">The training settings</param>
        /// <param name="usageEvents">The usage events to use for training</param>
        /// <param name="catalogItems">The catalog items to use for training</param>
        /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
        /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
        /// <param name="cancellationToken">A cancellation token</param>
        public IPredictorModel Train(ITrainingSettings settings,
                                     IList <SarUsageEvent> usageEvents,
                                     IList <SarCatalogItem> catalogItems,
                                     int uniqueUsersCount,
                                     int uniqueUsageItemsCount,
                                     CancellationToken cancellationToken)
            if (settings == null)
                throw new ArgumentNullException(nameof(settings));

            if (usageEvents == null)
                throw new ArgumentNullException(nameof(usageEvents));

            if (settings.EnableColdItemPlacement && catalogItems == null)
                throw new ArgumentNullException(nameof(catalogItems));

            if (uniqueUsersCount < 0)
                var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer");
                throw exception;

            if (uniqueUsageItemsCount < 0)
                var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer");
                throw exception;


            using (TlcEnvironment environment = new TlcEnvironment(verbose: true))
                _detectedFeatureWeights = null;
                    environment.AddListener <ChannelMessage>(ChannelMessageListener);
                    IHost environmentHost = environment.Register("SarHost");

                    // bind the cancellation token to SAR cancellation
                    using (cancellationToken.Register(() => { environmentHost.StopExecution(); }))
                        _tracer.TraceInformation("Starting training model using SAR");
                        return(TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount,
                    environment.RemoveListener <ChannelMessage>(ChannelMessageListener);
        public void InferSchemaCommandTest()
            var datasets = new[]
                GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv"))

            using (var env = new TlcEnvironment())
                var h = env.Register("InferSchemaCommandTest", seed: 0, verbose: false);
                using (var ch = h.Start("InferSchemaCommandTest"))
                    for (int i = 0; i < datasets.Length; i++)
                        var    outFile  = string.Format("dataset-infer-schema-result-{0:00}.txt", i);
                        string dataPath = GetOutputPath(Path.Combine("..", "Common", "Inference"), outFile);
                        var    args     = new InferSchemaCommand.Arguments()
                            DataFile   = datasets[i],
                            OutputFile = dataPath,

                        var cmd = new InferSchemaCommand(Env, args);

                        CheckEquality(Path.Combine("..", "Common", "Inference"), outFile);
Ejemplo n.º 3
        /// <summary>
        /// The main method to invoke TLC, with some high level configuration options set.
        /// </summary>
        /// <param name="env">The environment used in this run of TLC, for the purpose of returning outputs.</param>
        /// <param name="args">The command line arguments.</param>
        /// <param name="alwaysPrintStacktrace">"Marked" exceptions are assumed to be sufficiently descriptive, so we
        /// do not print stack traces for them to the console, and instead print these only to a log file.
        /// However, throwing unmarked exceptions is considered a bug in TLC (even if due to bad user input),
        /// so we always write . If set to true though, this executable will also print stack traces from the
        /// marked exceptions as well.</param>
        /// <returns></returns>
        internal static int MainCore(TlcEnvironment env, string args, bool alwaysPrintStacktrace)
            // REVIEW: How should extra dlls, tracking, etc be handled? Should the args objects for
            // all commands derive from a common base?
            var mainHost = env.Register("Main");

            using (var telemetryPipe = mainHost.StartPipe <TelemetryMessage>("TelemetryPipe"))
                using (var ch = mainHost.Start("Main"))
                    int result;
                        if (!CmdParser.TryGetFirstToken(args, out string kind, out string settings))
                            telemetryPipe.Send(TelemetryMessage.CreateCommand("ArgumentParsingFailure", args));

                        var cmdDef = new SubComponent <ICommand, SignatureCommand>(kind, settings);

                        if (!ComponentCatalog.TryCreateInstance(mainHost, out ICommand cmd, cmdDef))
                            // Telemetry: Log
                            telemetryPipe.Send(TelemetryMessage.CreateCommand("UnknownCommand", settings));
                            ch.Error("Unknown command: '{0}'", kind);

                        // Telemetry: Log the command and settings.
                        telemetryPipe.Send(TelemetryMessage.CreateCommand(kind.ToUpperInvariant(), settings));

                        result = 0;
                    catch (Exception ex)
                        var dumpFileDir = Path.Combine(
                        var dumpFilePath = Path.Combine(dumpFileDir,
                                                        string.Format(CultureInfo.InvariantCulture, "Error_{0:yyyyMMdd_HHmmss}_{1}.log", DateTime.UtcNow, Guid.NewGuid()));
                        bool isDumpSaved = false;
                            // REVIEW: Should specify the encoding.
                            using (var sw = new StreamWriter(new FileStream(dumpFilePath, FileMode.Create, FileAccess.Write)))
                                sw.WriteLine("--- Command line args ---");
                                sw.WriteLine("--- Exception message ---");
                                PrintFullExceptionDetails(sw, ex);

                            isDumpSaved = true;
                        catch (Exception)
                            // Don't throw an exception if we failed to write to the dump file.

                        // Process exceptions that we understand.
                        int count = 0;
                        for (var e = ex; e != null; e = e.InnerException)
                            // Telemetry: Log the exception
                            if (e.IsMarked())
                                ch.Error(e.Sensitivity(), e.Message);
                                PrintExceptionData(ch, e, false);

                        if (count == 0)
                            // Didn't recognize any of the exceptions.
                            ch.Error(MessageSensitivity.None, "***** Unexpected failure. Please go to https://aka.ms/MLNetIssue and register the error details *****");
                            if (isDumpSaved)
                                ch.Error(MessageSensitivity.None, "***** Error log has been saved to '{0}', please register the error at https://aka.ms/MLNetIssue *****",
                        else if (isDumpSaved)
                            ch.Error(MessageSensitivity.None, "Error log has been saved to '{0}'. please register the error at https://aka.ms/MLNetIssue",

                        if (count == 0 || alwaysPrintStacktrace)
                            ch.Error(MessageSensitivity.None, "===== Begin detailed dump =====");
                            PrintFullExceptionDetails(ch, ex);
                            ch.Error(MessageSensitivity.None, "====== End detailed dump =====");

                        // Return a negative result code so AEther recognizes this as a failure.
                        result = count > 0 ? -1 : -2;
        public void DatasetInferenceTest()
            var datasets = new[]

            using (var env = new TlcEnvironment())
                var h = env.Register("InferDatasetFeatures", seed: 0, verbose: false);

                using (var ch = h.Start("InferDatasetFeatures"))
                    for (int i = 0; i < datasets.Length; i++)
                        var sample      = TextFileSample.CreateFromFullFile(h, datasets[i]);
                        var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators);
                        if (!splitResult.IsSuccess)
                            throw ch.ExceptDecode("Couldn't detect separator.");

                        var typeInfResult = ColumnTypeInference.InferTextFileColumnTypes(Env, sample,
                                                                                         new ColumnTypeInference.Arguments
                            Separator   = splitResult.Separator,
                            AllowSparse = splitResult.AllowSparse,
                            AllowQuote  = splitResult.AllowQuote,
                            ColumnCount = splitResult.ColumnCount

                        if (!typeInfResult.IsSuccess)

                        ColumnGroupingInference.GroupingColumn[] columns = null;
                        bool hasHeader = false;
                        columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader);
                        Guid id          = new Guid("60C77F4E-DB62-4351-8311-9B392A12968E");
                        var  commandArgs = new DatasetFeatureInference.Arguments(typeInfResult.Data,
                                                                                     col =>
                                                                                     new DatasetFeatureInference.Column(col.SuggestedName, col.Purpose, col.ItemKind,
                                                                                                                        col.ColumnRangeSelector)).ToArray(), sample.FullFileSize, sample.ApproximateRowCount,
                                                                                 false, id, true);

                        string jsonString = DatasetFeatureInference.InferDatasetFeatures(env, commandArgs);
                        var    outFile    = string.Format("dataset-inference-result-{0:00}.txt", i);
                        string dataPath   = GetOutputPath(@"..\Common\Inference", outFile);
                        using (var sw = new StreamWriter(File.Create(dataPath)))

                        CheckEquality(@"..\Common\Inference", outFile);
Ejemplo n.º 5
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        /// <param name="settings">The training settings</param>
        /// <param name="usageEvents">The usage events to use for training</param>
        /// <param name="catalogItems">The catalog items to use for training</param>
        /// <param name="featureNames">The names of the catalog items features, in the same order as the feature values in the catalog</param>
        /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
        /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
        /// <param name="catalogFeatureWeights">The computed catalog items features weights (if relevant)</param>
        /// <param name="cancellationToken">A cancellation token</param>
        public IPredictorModel Train(ITrainingSettings settings,
                                     IList <SarUsageEvent> usageEvents,
                                     IList <SarCatalogItem> catalogItems,
                                     string[] featureNames,
                                     int uniqueUsersCount,
                                     int uniqueUsageItemsCount,
                                     out IDictionary <string, double> catalogFeatureWeights,
                                     CancellationToken cancellationToken)
            if (settings == null)
                throw new ArgumentNullException(nameof(settings));

            if (usageEvents == null)
                throw new ArgumentNullException(nameof(usageEvents));

            if (settings.EnableColdItemPlacement && catalogItems == null)
                throw new ArgumentNullException(nameof(catalogItems));

            if (uniqueUsersCount < 0)
                var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer");
                throw exception;

            if (uniqueUsageItemsCount < 0)
                var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer");
                throw exception;


            using (TlcEnvironment environment = new TlcEnvironment(verbose: true))
                _detectedFeatureWeights = null;
                    environment.AddListener <ChannelMessage>(ChannelMessageListener);
                    IHost environmentHost = environment.Register("SarHost");

                    // bind the cancellation token to SAR cancellation
                    using (cancellationToken.Register(() => { environmentHost.StopExecution(); }))
                        _tracer.TraceInformation("Starting training model using SAR");
                        IPredictorModel model = TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount,

                        catalogFeatureWeights = new Dictionary <string, double>();
                        if (_detectedFeatureWeights != null && featureNames != null)
                            if (_detectedFeatureWeights.Length == featureNames.Length)
                                for (int i = 0; i < featureNames.Length; i++)
                                    catalogFeatureWeights[featureNames[i]] = _detectedFeatureWeights[i];
                                    $"Found a mismatch between number of feature names ({featureNames.Length}) and the number of feature weights ({_detectedFeatureWeights.Length})");

                    environment.RemoveListener <ChannelMessage>(ChannelMessageListener);
Ejemplo n.º 6
        public void TestCancellation()
            var env = new TlcEnvironment(seed: 42);

            for (int z = 0; z < 1000; z++)
                var mainHost = env.Register("Main");
                var children = new ConcurrentDictionary <IHost, List <IHost> >();
                var hosts    = new BlockingCollection <Tuple <IHost, int> >();
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("1"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("2"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("3"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("4"), 1));
                hosts.Add(new Tuple <IHost, int>(mainHost.Register("5"), 1));

                int    iterations = 100;
                Random rand       = new Random();
                var    addThread  = new Thread(
                    () =>
                    for (int i = 0; i < iterations; i++)
                        var randHostTuple = hosts.ElementAt(rand.Next(hosts.Count - 1));
                        var newHost       = randHostTuple.Item1.Register((randHostTuple.Item2 + 1).ToString());
                        hosts.Add(new Tuple <IHost, int>(newHost, randHostTuple.Item2 + 1));
                        if (!children.ContainsKey(randHostTuple.Item1))
                            children[randHostTuple.Item1] = new List <IHost>();
                Queue <IHost> queue = new Queue <IHost>();
                for (int i = 0; i < 5; i++)
                    IHost rootHost = null;
                    var   index    = 0;
                        index = rand.Next(hosts.Count);
                    } while (hosts.ElementAt(index).Item1.IsCancelled || hosts.ElementAt(index).Item2 < 3);
                    rootHost = hosts.ElementAt(index).Item1;
                while (queue.Count > 0)
                    var currentHost = queue.Dequeue();

                    if (children.ContainsKey(currentHost))
                        children[currentHost].ForEach(x => queue.Enqueue(x));