Beispiel #1
0
        public void CanTrainSupervised()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Beispiel #2
0
        public void CanTrainSupervisedWithProgressCallback()
        {
            using var fastText = new FastTextWrapper();
            string outPath = Path.Combine(_tempDir, "cooking");
            int    callNum = 0;

            var args = new SupervisedArgs
            {
                TrainProgressCallback = (progress, loss, wst, lr, eta) =>
                {
                    callNum++;
                }
            };

            fastText.Supervised("cooking.train.txt", outPath, args);

            callNum.Should().BeGreaterThan(0);
            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Beispiel #3
0
        public void CanAutotuneSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args = new SupervisedArgs
            {
                bucket        = 2100000,
                dim           = 250,
                epoch         = 10,
                loss          = LossName.HierarchicalSoftmax,
                lr            = 0.5,
                maxn          = 5,
                minn          = 2,
                neg           = 6,
                seed          = 42,
                t             = 0.0002,
                thread        = 10,
                verbose       = 1,
                ws            = 6,
                minCount      = 2,
                saveOutput    = true,
                wordNgrams    = 2,
                lrUpdateRate  = 110,
                minCountLabel = 1
            };

            var autotuneArgs = new AutotuneArgs
            {
                Duration       = 30,
                Metric         = "precisionAtRecall:30",
                Predictions    = 2,
                ValidationFile = "cooking.valid.txt"
            };

            fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(250);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Beispiel #4
0
        static void Main(string[] args)
        {
            Log.Logger = new LoggerConfiguration()
                         .MinimumLevel.Debug()
                         .WriteTo.Console(theme: ConsoleTheme.None)
                         .CreateLogger();

            var log     = Log.ForContext <Program>();
            var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

            Directory.CreateDirectory(tempDir);

            log.Information($"Temp dir: {tempDir}");

            string outPath  = Path.Combine(tempDir, "cooking.bin");
            var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

            var ftArgs = new SupervisedArgs();

            ftArgs.epoch      = 15;
            ftArgs.lr         = 1;
            ftArgs.dim        = 300;
            ftArgs.wordNgrams = 2;
            ftArgs.minn       = 3;
            ftArgs.maxn       = 6;
            fastText.Supervised("cooking.train.txt", outPath, ftArgs);

            try
            {
                File.Delete("_debug.txt");
            }
            catch
            {
            }

            var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true);

            log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" +
                            $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" +
                            $"\n\tF1: {result.GlobalMetrics.GetF1()}");

            var curve = result.GetPrecisionRecallCurve();

            var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels());

            string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve });

            log.Information($"Precision-Recall plot: {plotPath}");

            Console.WriteLine("\nPress any key to exit.");
            Console.ReadKey();

            Directory.Delete(tempDir, true);
        }
Beispiel #5
0
 private static void TrainSupervised(FastTextWrapper fastText, string trainFile, string modelFile)
 {
     fastText.Train(trainFile, modelFile, SupervisedArgs.SupervisedDefaults(
                        x =>
     {
         x.Epochs       = 25;
         x.LearningRate = 1.0;
         x.WordNGrams   = 3;
         x.Verbose      = 2;
         x.LabelPrefix  = "__label__";
     }));
 }
Beispiel #6
0
        public void CanGetDefaultSupervisedArgs()
        {
            var args = new SupervisedArgs();

            args.bucket.Should().Be(2000000);
            args.dim.Should().Be(100);
            args.loss.Should().Be(LossName.Softmax);
            args.model.Should().Be(ModelName.Supervised);
            args.minCount.Should().Be(1);
            args.minn.Should().Be(0);
            args.maxn.Should().Be(0);
            args.lr.Should().BeApproximately(0.1d, 10e-5);
        }
Beispiel #7
0
        public void CantTrainSupervisedWithPretrainedVectorsWithDifferentDimension()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = new SupervisedArgs();

            args.PretrainedVectors = "cooking.unsup.300.vec";

            fastText.Invoking(x => x.Supervised("cooking.train.txt", outPath, args))
            .Should().Throw <NativeLibraryException>()
            .WithMessage("Dimension of pretrained vectors (300) does not match dimension (100)!");
        }
Beispiel #8
0
    /// <summary>
    /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated
    /// hyperparameter search will be performed.
    /// </summary>
    /// <param name="inputPath">Path to a training set.</param>
    /// <param name="outputPath">Path to write the model to (excluding extension).</param>
    /// <param name="args">
    /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained.
    /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training.
    /// </param>
    /// <param name="autotuneArgs">Autotune arguments.</param>
    /// <param name="debug">Whether to write debug info.</param>
    /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks>
    internal void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, bool debug)
    {
        ValidatePaths(inputPath, outputPath, args.PretrainedVectors);

        if (args.model != ModelName.Supervised)
        {
            _logger?.LogWarning($"{args.model} model type specified in a Supervised() call. Model type will be changed to Supervised.");
        }

        var quantizedArgs = args as QuantizedSupervisedArgs;

        if (!string.IsNullOrEmpty(autotuneArgs.ModelSize) && quantizedArgs == null)
        {
            throw new InvalidOperationException("You specified model size in autotuneArgs, but passed SupervisedArgs instance. Pass QuantizedSupervisedArgs instead.");
        }

        bool quantizeWithNoQuantTune = quantizedArgs != null && string.IsNullOrEmpty(autotuneArgs.ModelSize);

        var argsStruct = _mapper.Map <FastTextArgsStruct>(args);

        argsStruct.model = model_name.sup;

        var autotuneStruct = _mapper.Map <AutotuneArgsStruct>(autotuneArgs);

        CheckForErrors(Train(
                           _fastText,
                           inputPath,
                           quantizeWithNoQuantTune ? null : outputPath,
                           argsStruct,
                           autotuneStruct,
                           args.TrainProgressCallback,
                           autotuneArgs.AutotuneProgressCallback,
                           args.LabelPrefix,
                           args.PretrainedVectors,
                           debug));

        if (quantizeWithNoQuantTune)
        {
            Quantize(quantizedArgs, outputPath);
        }
        else
        {
            _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText));
            ModelPath    = AdjustPath(outputPath, !string.IsNullOrEmpty(autotuneArgs.ModelSize));
        }
    }
Beispiel #9
0
        public void CanTrainSupervisedWithPretrainedVectors()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = new SupervisedArgs();

            args.PretrainedVectors = "cooking.unsup.300.vec";
            args.dim = 300;

            fastText.Supervised("cooking.train.txt", outPath, args, new AutotuneArgs(), true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(300);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Beispiel #10
0
        public void CanTrainSupervisedWithRelativeOutput()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be("cooking.bin");

            AssertLabels(fastText.GetLabels());

            File.Exists("cooking.bin").Should().BeTrue();
            File.Exists("cooking.vec").Should().BeTrue();

            File.Delete("cooking.bin");
            File.Delete("cooking.vec");
        }
Beispiel #11
0
 private void AssertSupervisedArgs(SupervisedArgs expected, SupervisedArgs actual)
 {
     actual.lr.Should().Be(expected.lr);
     actual.lrUpdateRate.Should().Be(expected.lrUpdateRate);
     actual.dim.Should().Be(expected.dim);
     actual.ws.Should().Be(expected.ws);
     actual.epoch.Should().Be(expected.epoch);
     actual.minCount.Should().Be(expected.minCount);
     actual.minCountLabel.Should().Be(expected.minCountLabel);
     actual.neg.Should().Be(expected.neg);
     actual.wordNgrams.Should().Be(expected.wordNgrams);
     actual.loss.Should().Be(expected.loss);
     actual.model.Should().Be(expected.model);
     actual.bucket.Should().Be(expected.bucket);
     actual.minn.Should().Be(expected.minn);
     actual.maxn.Should().Be(expected.maxn);
     actual.thread.Should().Be(expected.thread);
     actual.t.Should().Be(expected.t);
     (actual.LabelPrefix ?? "").Should().Be(expected.LabelPrefix ?? "");
     actual.verbose.Should().Be(expected.verbose);
     (actual.PretrainedVectors ?? "").Should().Be(expected.PretrainedVectors ?? "");
     actual.saveOutput.Should().Be(expected.saveOutput);
     actual.seed.Should().Be(expected.seed);
 }
 private static void TrainSupervised()
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", SupervisedArgs.SupervisedDefaults(x =>
         {
             x.Epochs       = 25;
             x.LearningRate = 1.0;
             x.WordNGrams   = 3;
             x.Verbose      = 2;
             x.LabelPrefix  = "__label__";
         }));
     }
 }
Beispiel #13
0
 /// <summary>
 /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated
 /// hyperparameter search will be performed.
 /// </summary>
 /// <param name="inputPath">Path to a training set.</param>
 /// <param name="outputPath">Path to write the model to (excluding extension).</param>
 /// <param name="args">
 /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained.
 /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training.
 /// </param>
 /// <param name="autotuneArgs">Autotune arguments.</param>
 /// <param name="progressCallback">Optional progress callback.</param>
 /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks>
 public void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, TrainProgressCallback progressCallback = null)
 {
     Supervised(inputPath, outputPath, args, autotuneArgs, false);
 }
    static void Main(string[] args)
    {
        Log.Logger = new LoggerConfiguration()
                     .MinimumLevel.Debug()
                     .WriteTo.Console(theme: ConsoleTheme.None)
                     .CreateLogger();

        var log     = Log.ForContext <Program>();
        var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

        Directory.CreateDirectory(tempDir);

        log.Information($"Temp dir: {tempDir}");

        string outPath  = Path.Combine(tempDir, "cooking.bin");
        var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

        AnsiConsole.Progress()
        .Start(ctx =>
        {
            var task   = ctx.AddTask("Training");
            var ftArgs = new SupervisedArgs
            {
                epoch                 = 15,
                lr                    = 1,
                dim                   = 300,
                wordNgrams            = 2,
                minn                  = 3,
                maxn                  = 6,
                verbose               = 0,
                TrainProgressCallback = (progress, loss, wst, lr, eta) =>
                {
                    task.Value       = Math.Ceiling(progress * 100);
                    task.Description = $"Loss: {loss:N3}, words/thread/sec: {wst}, LR: {lr:N5}, ETA: {eta}";
                }
            };

            fastText.Supervised("cooking.train.txt", outPath, ftArgs);
        });

        try
        {
            File.Delete("_debug.txt");
        }
        catch
        {
        }

        log.Information("Validating model on the test set");

        var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true);

        log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" +
                        $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" +
                        $"\n\tF1: {result.GlobalMetrics.GetF1()}");

        var curve = result.GetPrecisionRecallCurve();

        var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels());

        string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve });

        log.Information($"Precision-Recall plot: {plotPath}");

        Console.WriteLine("\nPress any key to exit.");
        Console.ReadKey();

        Directory.Delete(tempDir, true);
    }