コード例 #1
0
        private static IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Options()
            {
                Separators = new[] { '\t' },
                HasHeader  = true,
                Columns    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Boolean, 0),
                    new TextLoader.Column("SentimentText", DataKind.String, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Options()
            {
                KeepDiacritics         = false,
                KeepPunctuations       = false,
                CaseMode               = TextNormalizingEstimator.CaseMode.Lower,
                OutputTokensColumnName = "tokens",
                Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None,
                CharFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 3, UseAllLengths = false
                },
                WordFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 2, UseAllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1);
            {
                // Pipeline
                var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename));

                var trans = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, new SdcaLogisticRegressionBinaryTrainer.Options
                {
                    LabelColumnName   = "Label",
                    FeatureColumnName = "Features"
                });

                var cached    = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
コード例 #2
0
 /// <summary>
 /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse.
 /// </summary>
 protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input)
 {
     if (_inDataFrame)
     {
         if (_async)
         {
             var view = new CacheDataView(env, input, null);
             var tr   = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view);
             return(tr);
         }
         else
         {
             var args = new SortInDataFrameTransform.Arguments()
             {
                 numThreads = _numThreads, sortColumn = null
             };
             var tr = new SortInDataFrameTransform(env, args, input);
             return(tr);
         }
     }
     else
     {
         string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty;
         using (var ch = Host.Start("Caching data..."))
         {
             if (_reuse && File.Exists(_cacheFile))
             {
                 ch.Info(MessageSensitivity.UserData, "Reusing cache '{0}'", _cacheFile);
             }
             else
             {
                 ch.Info(MessageSensitivity.UserData, "Building cache '{0}'", _cacheFile);
                 var saver = ComponentCreation.CreateSaver(env, _saverSettings);
                 using (var fs0 = Host.CreateOutputFile(_cacheFile))
                     DataSaverUtils.SaveDataView(ch, saver, input, fs0, true);
             }
         }
         var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt),
                                                     new MultiFileSource(_cacheFile));
         SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema);
         var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader);
         return(copy);
     }
 }
コード例 #3
0
        private static IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.BL, 0),
                    new TextLoader.Column("SentimentText", DataKind.Text, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Arguments()
            {
                Column = new TextFeaturizingEstimator.Column
                {
                    Name   = "Features",
                    Source = new[] { "SentimentText" }
                },
                KeepDiacritics               = false,
                KeepPunctuations             = false,
                TextCase                     = TextNormalizingEstimator.CaseNormalizationMode.Lower,
                OutputTokens                 = true,
                UsePredefinedStopWordRemover = true,
                VectorNormalizer             = normalize ? TextFeaturizingEstimator.TextNormKind.L2 : TextFeaturizingEstimator.TextNormKind.None,
                CharFeatureExtractor         = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 2, AllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            using (var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, args).Read(new MultiFileSource(trainFilename));

                var trans = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaBinaryTrainer(env, new SdcaBinaryTrainer.Arguments
                {
                    NumThreads    = 1,
                    LabelColumn   = "Label",
                    FeatureColumn = "Features"
                });

                var cached    = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }