private static IPredictor TrainCore(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, RoleMappedData validData, IComponentFactory <ICalibratorTrainer> calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inputPredictor = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckValueOrNull(validData); ch.CheckValueOrNull(inputPredictor); AddCacheIfWanted(env, ch, trainer, ref data, cacheData); ch.Trace("Training"); if (validData != null) { AddCacheIfWanted(env, ch, trainer, ref validData, cacheData); } if (inputPredictor != null && !trainer.Info.SupportsIncrementalTraining) { ch.Warning("Ignoring " + nameof(TrainCommand.Arguments.InputModelFile) + ": Trainer does not support incremental training."); inputPredictor = null; } ch.Assert(validData == null || trainer.Info.SupportsValidation); var predictor = trainer.Train(new TrainContext(data, validData, inputPredictor)); var caliTrainer = calibrator?.CreateComponent(env); return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, caliTrainer, maxCalibrationExamples, trainer, predictor, data)); }
private static IDataLoader LoadStopwords(IHostEnvironment env, IChannel ch, string dataFile, IComponentFactory <IMultiStreamSource, IDataLoader> loader, ref string stopwordsCol) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); MultiFileSource fileSource = new MultiFileSource(dataFile); IDataLoader dataLoader; // First column using the file. if (loader == null) { // Determine the default loader from the extension. var ext = Path.GetExtension(dataFile); bool isBinary = string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase); bool isTranspose = string.Equals(ext, ".tdv", StringComparison.OrdinalIgnoreCase); if (isBinary || isTranspose) { ch.Assert(isBinary != isTranspose); ch.CheckUserArg(!string.IsNullOrWhiteSpace(stopwordsCol), nameof(Arguments.StopwordsColumn), "stopwordsColumn should be specified"); if (isBinary) { dataLoader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource); } else { ch.Assert(isTranspose); dataLoader = new TransposeLoader(env, new TransposeLoader.Arguments(), fileSource); } } else { if (!string.IsNullOrWhiteSpace(stopwordsCol)) { ch.Warning("{0} should not be specified when default loader is TextLoader. Ignoring stopwordsColumn={0}", stopwordsCol); } dataLoader = TextLoader.Create( env, new TextLoader.Arguments() { Separator = "tab", Column = new[] { new TextLoader.Column("Stopwords", DataKind.TX, 0) } }, fileSource); stopwordsCol = "Stopwords"; } ch.AssertNonEmpty(stopwordsCol); } else { dataLoader = loader.CreateComponent(env, fileSource); } return(dataLoader); }
public static IPredictor Train(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, RoleMappedData validData, IComponentFactory <ICalibratorTrainer> calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inputPredictor = null) { ICalibratorTrainer caliTrainer = calibrator?.CreateComponent(env); return(TrainCore(env, ch, data, trainer, validData, caliTrainer, maxCalibrationExamples, cacheData, inputPredictor)); }
/// <summary> /// Runs the healthcheck. /// </summary> /// <param name="id">The identifier.</param> public void RunHealthcheck(string id = null) { using (new LanguageSwitcher(Language.Parse("en"))) { var maximumNumberOfThreads = Settings.GetIntSetting(maxNumberOfThreadsSettingsKey, 1); var queue = new ConcurrentQueue <BaseComponent>(); using (new DatabaseSwitcher(Factory.GetDatabase("master"))) { using (new SecurityDisabler()) { var settingsItem = Sitecore.Context.Database.GetItem(new ID(Constants.SettingsItemId)); int numberOfDaysToKeepLogs = 0; if (!int.TryParse(settingsItem["Days"], out numberOfDaysToKeepLogs)) { numberOfDaysToKeepLogs = DefaultNumberOfDaysToKeepLogs; } var componentsFolder = Sitecore.Context.Database.GetItem(new ID(Constants.ComponentsRootFolderId)); foreach (Item item in componentsFolder.Axes.GetDescendants()) { if (!string.IsNullOrEmpty(id) && !item.ID.ToString().Equals(id, System.StringComparison.OrdinalIgnoreCase)) { continue; } var component = componentFactory.CreateComponent(item); if (component != null) { queue.Enqueue(component); } } List <Action> actions = new List <Action>(); for (int i = 0; i < maximumNumberOfThreads; i++) { Action action = () => { BaseComponent component; while (queue.TryDequeue(out component)) { component.RunHealthcheck(); component.SaveHealthcheckResult(numberOfDaysToKeepLogs); } }; actions.Add(action); } Parallel.Invoke(actions.ToArray()); } } } }
/// <summary> /// Given a predictor, an optional mapper factory, and an optional scorer factory settings, /// produces a compatible ISchemaBindableMapper. /// First, it tries to instantiate the bindable mapper using the mapper factory. /// Next, it tries to instantiate the bindable mapper using the <paramref name="scorerFactorySettings"/> /// (this will only succeed if there's a registered BindableMapper creation method with load name equal to the one /// of the scorer). /// If the above fails, it checks whether the predictor implements <see cref="ISchemaBindableMapper"/> /// directly. /// If this also isn't true, it will create a 'matching' standard mapper. /// </summary> public static ISchemaBindableMapper GetSchemaBindableMapper( IHostEnvironment env, IPredictor predictor, IComponentFactory<IPredictor, ISchemaBindableMapper> mapperFactory = null, ICommandLineComponentFactory scorerFactorySettings = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(predictor, nameof(predictor)); env.CheckValueOrNull(mapperFactory); env.CheckValueOrNull(scorerFactorySettings); // if the mapperFactory was supplied, use it if (mapperFactory != null) return mapperFactory.CreateComponent(env, predictor); // See if we can instantiate a mapper using scorer arguments. if (scorerFactorySettings != null && TryCreateBindableFromScorer(env, predictor, scorerFactorySettings, out var bindable)) return bindable; // The easy case is that the predictor implements the interface. bindable = predictor as ISchemaBindableMapper; if (bindable != null) return bindable; // Use one of the standard wrappers. if (predictor is IValueMapperDist) return new SchemaBindableBinaryPredictorWrapper(predictor); return new SchemaBindablePredictorWrapper(predictor); }
/// <summary> /// Parses a single entity into an Entitydata object. /// </summary> /// <param name="xEntity">The x entity.</param> /// <returns></returns> private EntityData ParseEntity(XElement xEntity) { if (xEntity == null) { EntityIoLogger.WriteNullArgumentIoException(new ArgumentNullException(xEntity.ToString()), IoType.Component, _entityNumber); } EntityIoLogger.WriteIoInformation(xEntity, IoType.Entity, _entityNumber); EntityData entityData = new EntityData(); var xComponents = xEntity.Descendants("Components"); var xSpriteComponent = xComponents.Descendants("Sprite"); var xPlayerComponent = xComponents.Descendants("Player"); var xPositionComponent = xComponents.Descendants("Position"); var xVelocitiyComponent = xComponents.Descendants("Velocity"); var xAccelerationComponent = xComponents.Descendants("Acceleration"); if (xSpriteComponent.Any()) { entityData.Components.Add( _componentFactory.CreateComponent <SpriteComponent>(xSpriteComponent.FirstOrDefault())); } if (xPlayerComponent.Any()) { entityData.Components.Add( _componentFactory.CreateComponent <PlayerComponent>(xPlayerComponent.FirstOrDefault())); } if (xPositionComponent.Any()) { entityData.Components.Add( _componentFactory.CreateComponent <PositionComponent>(xPositionComponent.FirstOrDefault())); } if (xVelocitiyComponent.Any()) { entityData.Components.Add( _componentFactory.CreateComponent <VelocityComponent>(xVelocitiyComponent.FirstOrDefault())); } if (xAccelerationComponent.Any()) { entityData.Components.Add( _componentFactory.CreateComponent <AccelerationComponent>(xAccelerationComponent.FirstOrDefault())); } return(entityData); }
private void RunCore(IChannel ch, string cmd) { Host.AssertValue(ch); Host.AssertNonEmpty(cmd); ch.Trace("Constructing trainer"); ITrainer trainer = _trainer.CreateComponent(Host); IPredictor inputPredictor = null; if (Args.ContinueTrain && !TrainUtils.TryLoadPredictor(ch, Host, Args.InputModelFile, out inputPredictor)) { ch.Warning("No input model file specified or model file did not contain a predictor. The model state cannot be initialized."); } ch.Trace("Constructing data pipeline"); IDataView view = CreateLoader(); ISchema schema = view.Schema; var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.LabelColumn), _labelColumn, DefaultColumnNames.Label); var feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.FeatureColumn), _featureColumn, DefaultColumnNames.Features); var group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.GroupColumn), _groupColumn, DefaultColumnNames.GroupId); var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.WeightColumn), _weightColumn, DefaultColumnNames.Weight); var name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.NameColumn), _nameColumn, DefaultColumnNames.Name); TrainUtils.AddNormalizerIfNeeded(Host, ch, trainer, ref view, feature, Args.NormalizeFeatures); ch.Trace("Binding columns"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, Args.CustomColumn); var data = new RoleMappedData(view, label, feature, group, weight, name, customCols); // REVIEW: Unify the code that creates validation examples in Train, TrainTest and CV commands. RoleMappedData validData = null; if (!string.IsNullOrWhiteSpace(Args.ValidationFile)) { if (!trainer.Info.SupportsValidation) { ch.Warning("Ignoring validationFile: Trainer does not accept validation dataset."); } else { ch.Trace("Constructing the validation pipeline"); IDataView validPipe = CreateRawLoader(dataFile: Args.ValidationFile); validPipe = ApplyTransformUtils.ApplyAllTransformsToData(Host, view, validPipe); validData = new RoleMappedData(validPipe, data.Schema.GetColumnRoleNames()); } } var predictor = TrainUtils.Train(Host, ch, data, trainer, validData, Args.Calibrator, Args.MaxCalibrationExamples, Args.CacheData, inputPredictor); using (var file = Host.CreateOutputFile(Args.OutputModelFile)) TrainUtils.SaveModel(Host, ch, file, predictor, data, cmd); }
private IComponent CreateComponent(ComponentTemplate componentTemplate) { var component = componentFactory.CreateComponent(componentTemplate.ComponentType); /* * // Iterate virtual template components * foreach (var childComponentTemplate in componentTemplate.Templates) * { * var child = CreateComponent(childComponentTemplate); * component.Children.Add(child); * child.Parent = component; * } */ styleSetter.ApplyStyle(component); foreach (var property in componentTemplate.PropertySetters) { styleSetter.ApplyProperty(component, property); } return(component); }
private static byte[] GetBytesOne(IHost host, string dataFile, IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory, string termColumn, string valueColumn) { Contracts.AssertValue(host); host.Assert(!string.IsNullOrWhiteSpace(dataFile)); host.AssertNonEmpty(termColumn); host.AssertNonEmpty(valueColumn); IMultiStreamSource fileSource = new MultiFileSource(dataFile); IDataLoader loader; if (loaderFactory == null) { // REVIEW: Should there be defaults for loading from text? var ext = Path.GetExtension(dataFile); bool isBinary = string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase); bool isTranspose = string.Equals(ext, ".tdv", StringComparison.OrdinalIgnoreCase); if (!isBinary && !isTranspose) { throw host.ExceptUserArg(nameof(Arguments.Loader), "must specify the loader"); } host.Assert(isBinary != isTranspose); // One or the other must be true. if (isBinary) { loader = new BinaryLoader(host, new BinaryLoader.Arguments(), fileSource); } else { loader = new TransposeLoader(host, new TransposeLoader.Arguments(), fileSource); } } else { loader = loaderFactory.CreateComponent(host, fileSource); } return(GetBytesFromDataView(host, loader, termColumn, valueColumn)); }
private FoldResult RunFold(int fold) { var host = GetHost(); host.Assert(0 <= fold && fold <= _numFolds); // REVIEW: Make channels buffered in multi-threaded environments. using (var ch = host.Start($"Fold {fold}")) { ch.Trace("Constructing trainer"); ITrainer trainer = _trainer.CreateComponent(host); // Train pipe. var trainFilter = new RangeFilter.Arguments(); trainFilter.Column = _splitColumn; trainFilter.Min = (Double)fold / _numFolds; trainFilter.Max = (Double)(fold + 1) / _numFolds; trainFilter.Complement = true; IDataView trainPipe = new RangeFilter(host, trainFilter, _inputDataView); trainPipe = new OpaqueDataView(trainPipe); var trainData = _createExamples(host, ch, trainPipe, trainer); // Test pipe. var testFilter = new RangeFilter.Arguments(); testFilter.Column = trainFilter.Column; testFilter.Min = trainFilter.Min; testFilter.Max = trainFilter.Max; ch.Assert(!testFilter.Complement); IDataView testPipe = new RangeFilter(host, testFilter, _inputDataView); testPipe = new OpaqueDataView(testPipe); var testData = _applyTransformsToTestData(host, ch, testPipe, trainData, trainPipe); // Validation pipe and examples. RoleMappedData validData = null; if (_getValidationDataView != null) { ch.Assert(_applyTransformsToValidationData != null); if (!trainer.Info.SupportsValidation) { ch.Warning("Trainer does not accept validation dataset."); } else { ch.Trace("Constructing the validation pipeline"); IDataView validLoader = _getValidationDataView(); var validPipe = ApplyTransformUtils.ApplyAllTransformsToData(host, _inputDataView, validLoader); validPipe = new OpaqueDataView(validPipe); validData = _applyTransformsToValidationData(host, ch, validPipe, trainData, trainPipe); } } // Train. var predictor = TrainUtils.Train(host, ch, trainData, trainer, validData, _calibrator, _maxCalibrationExamples, _cacheData, _inputPredictor); // Score. ch.Trace("Scoring and evaluating"); ch.Assert(_scorer == null || _scorer is ICommandLineComponentFactory, "CrossValidationCommand should only be used from the command line."); var bindable = ScoreUtils.GetSchemaBindableMapper(host, predictor, scorerFactorySettings: _scorer as ICommandLineComponentFactory); ch.AssertValue(bindable); var mapper = bindable.Bind(host, testData.Schema); var scorerComp = _scorer ?? ScoreUtils.GetScorerComponent(mapper); IDataScorerTransform scorePipe = scorerComp.CreateComponent(host, testData.Data, mapper, trainData.Schema); // Save per-fold model. string modelFileName = ConstructPerFoldName(_outputModelFile, fold); if (modelFileName != null && _loader != null) { using (var file = host.CreateOutputFile(modelFileName)) { var rmd = new RoleMappedData( CompositeDataLoader.ApplyTransform(host, _loader, null, null, (e, newSource) => ApplyTransformUtils.ApplyAllTransformsToData(e, trainData.Data, newSource)), trainData.Schema.GetColumnRoleNames()); TrainUtils.SaveModel(host, ch, file, predictor, rmd, _cmd); } } // Evaluate. var eval = _evaluator?.CreateComponent(host) ?? EvaluateUtils.GetEvaluator(host, scorePipe.Schema); // Note that this doesn't require the provided columns to exist (because of the "opt" parameter). // We don't normally expect the scorer to drop columns, but if it does, we should not require // all the columns in the test pipeline to still be present. var dataEval = new RoleMappedData(scorePipe, testData.Schema.GetColumnRoleNames(), opt: true); var dict = eval.Evaluate(dataEval); RoleMappedData perInstance = null; if (_savePerInstance) { var perInst = eval.GetPerInstanceMetrics(dataEval); perInstance = new RoleMappedData(perInst, dataEval.Schema.GetColumnRoleNames(), opt: true); } ch.Done(); return(new FoldResult(dict, dataEval.Schema.Schema, perInstance, trainData.Schema)); } }
private void RunCore(IChannel ch, string cmd) { Host.AssertValue(ch); Host.AssertNonEmpty(cmd); ch.Trace("Constructing trainer"); ITrainer trainer = _trainer.CreateComponent(Host); IPredictor inputPredictor = null; if (Args.ContinueTrain && !TrainUtils.TryLoadPredictor(ch, Host, Args.InputModelFile, out inputPredictor)) { ch.Warning("No input model file specified or model file did not contain a predictor. The model state cannot be initialized."); } ch.Trace("Constructing data pipeline"); IDataView view = CreateLoader(); ISchema schema = view.Schema; var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.LabelColumn), _labelColumn, DefaultColumnNames.Label); var feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.FeatureColumn), _featureColumn, DefaultColumnNames.Features); var group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.GroupColumn), _groupColumn, DefaultColumnNames.GroupId); var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.WeightColumn), _weightColumn, DefaultColumnNames.Weight); var name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.NameColumn), _nameColumn, DefaultColumnNames.Name); TrainUtils.AddNormalizerIfNeeded(Host, ch, trainer, ref view, feature, Args.NormalizeFeatures); ch.Trace("Binding columns"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, Args.CustomColumn); var data = new RoleMappedData(view, label, feature, group, weight, name, customCols); // REVIEW: Unify the code that creates validation examples in Train, TrainTest and CV commands. RoleMappedData validData = null; if (!string.IsNullOrWhiteSpace(Args.ValidationFile)) { if (!trainer.Info.SupportsValidation) { ch.Warning("Ignoring validationFile: Trainer does not accept validation dataset."); } else { ch.Trace("Constructing the validation pipeline"); IDataView validPipe = CreateRawLoader(dataFile: Args.ValidationFile); validPipe = ApplyTransformUtils.ApplyAllTransformsToData(Host, view, validPipe); validData = new RoleMappedData(validPipe, data.Schema.GetColumnRoleNames()); } } // In addition to the training set, some trainers can accept two extra data sets, validation set and test set, // in training phase. The major difference between validation set and test set is that training process may // indirectly use validation set to improve the model but the learned model should totally independent of test set. // Similar to validation set, the trainer can report the scores computed using test set. RoleMappedData testDataUsedInTrainer = null; if (!string.IsNullOrWhiteSpace(Args.TestFile)) { // In contrast to the if-else block for validation above, we do not throw a warning if test file is provided // because this is TrainTest command. if (trainer.Info.SupportsTest) { ch.Trace("Constructing the test pipeline"); IDataView testPipeUsedInTrainer = CreateRawLoader(dataFile: Args.TestFile); testPipeUsedInTrainer = ApplyTransformUtils.ApplyAllTransformsToData(Host, view, testPipeUsedInTrainer); testDataUsedInTrainer = new RoleMappedData(testPipeUsedInTrainer, data.Schema.GetColumnRoleNames()); } } var predictor = TrainUtils.Train(Host, ch, data, trainer, validData, Args.Calibrator, Args.MaxCalibrationExamples, Args.CacheData, inputPredictor, testDataUsedInTrainer); using (var file = Host.CreateOutputFile(Args.OutputModelFile)) TrainUtils.SaveModel(Host, ch, file, predictor, data, cmd); }
public void Train(List <FeatureSubsetModel <IPredictorProducing <TOutput> > > models, RoleMappedData data, IHostEnvironment env) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(Stacking.LoadName); host.CheckValue(models, nameof(models)); host.CheckValue(data, nameof(data)); using (var ch = host.Start("Training stacked model")) { ch.Check(Meta == null, "Train called multiple times"); ch.Check(BasePredictorType != null); var maps = new ValueMapper <VBuffer <Single>, TOutput> [models.Count]; for (int i = 0; i < maps.Length; i++) { Contracts.Assert(models[i].Predictor is IValueMapper); var m = (IValueMapper)models[i].Predictor; maps[i] = m.GetMapper <VBuffer <Single>, TOutput>(); } // REVIEW: Should implement this better.... var labels = new Single[100]; var features = new VBuffer <Single> [100]; int count = 0; // REVIEW: Should this include bad values or filter them? using (var cursor = new FloatLabelCursor(data, CursOpt.AllFeatures | CursOpt.AllLabels)) { TOutput[] predictions = new TOutput[maps.Length]; var vBuffers = new VBuffer <Single> [maps.Length]; while (cursor.MoveNext()) { Parallel.For(0, maps.Length, i => { var model = models[i]; if (model.SelectedFeatures != null) { EnsembleUtils.SelectFeatures(ref cursor.Features, model.SelectedFeatures, model.Cardinality, ref vBuffers[i]); maps[i](ref vBuffers[i], ref predictions[i]); } else { maps[i](ref cursor.Features, ref predictions[i]); } }); Utils.EnsureSize(ref labels, count + 1); Utils.EnsureSize(ref features, count + 1); labels[count] = cursor.Label; FillFeatureBuffer(predictions, ref features[count]); count++; } } ch.Info("The number of instances used for stacking trainer is {0}", count); var bldr = new ArrayDataViewBuilder(host); Array.Resize(ref labels, count); Array.Resize(ref features, count); bldr.AddColumn(DefaultColumnNames.Label, NumberType.Float, labels); bldr.AddColumn(DefaultColumnNames.Features, NumberType.Float, features); var view = bldr.GetDataView(); var rmd = new RoleMappedData(view, DefaultColumnNames.Label, DefaultColumnNames.Features); var trainer = BasePredictorType.CreateComponent(host); if (trainer.Info.NeedNormalization) { ch.Warning("The trainer specified for stacking wants normalization, but we do not currently allow this."); } Meta = trainer.Train(rmd); CheckMeta(); } }