Exemple #1
0
        public void CanTrainSupervised()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Exemple #2
0
 private void AssertAutotuneArgs(AutotuneArgs expected, AutotuneArgs actual)
 {
     (actual.ValidationFile ?? "").Should().Be(expected.ValidationFile ?? "");
     (actual.Metric ?? "").Should().Be(expected.Metric ?? "");
     actual.Predictions.Should().Be(expected.Predictions);
     actual.Duration.Should().Be(expected.Duration);
     (actual.ModelSize ?? "").Should().Be(expected.ModelSize ?? "");
 }
Exemple #3
0
        public void CanAutotuneSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args = new SupervisedArgs
            {
                bucket        = 2100000,
                dim           = 250,
                epoch         = 10,
                loss          = LossName.HierarchicalSoftmax,
                lr            = 0.5,
                maxn          = 5,
                minn          = 2,
                neg           = 6,
                seed          = 42,
                t             = 0.0002,
                thread        = 10,
                verbose       = 1,
                ws            = 6,
                minCount      = 2,
                saveOutput    = true,
                wordNgrams    = 2,
                lrUpdateRate  = 110,
                minCountLabel = 1
            };

            var autotuneArgs = new AutotuneArgs
            {
                Duration       = 30,
                Metric         = "precisionAtRecall:30",
                Predictions    = 2,
                ValidationFile = "cooking.valid.txt"
            };

            fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(250);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Exemple #4
0
        public void CanTrainSupervisedWithRelativeOutput()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be("cooking.bin");

            AssertLabels(fastText.GetLabels());

            File.Exists("cooking.bin").Should().BeTrue();
            File.Exists("cooking.vec").Should().BeTrue();

            File.Delete("cooking.bin");
            File.Delete("cooking.vec");
        }
Exemple #5
0
    /// <summary>
    /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated
    /// hyperparameter search will be performed.
    /// </summary>
    /// <param name="inputPath">Path to a training set.</param>
    /// <param name="outputPath">Path to write the model to (excluding extension).</param>
    /// <param name="args">
    /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained.
    /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training.
    /// </param>
    /// <param name="autotuneArgs">Autotune arguments.</param>
    /// <param name="debug">Whether to write debug info.</param>
    /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks>
    internal void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, bool debug)
    {
        ValidatePaths(inputPath, outputPath, args.PretrainedVectors);

        if (args.model != ModelName.Supervised)
        {
            _logger?.LogWarning($"{args.model} model type specified in a Supervised() call. Model type will be changed to Supervised.");
        }

        var quantizedArgs = args as QuantizedSupervisedArgs;

        if (!string.IsNullOrEmpty(autotuneArgs.ModelSize) && quantizedArgs == null)
        {
            throw new InvalidOperationException("You specified model size in autotuneArgs, but passed SupervisedArgs instance. Pass QuantizedSupervisedArgs instead.");
        }

        bool quantizeWithNoQuantTune = quantizedArgs != null && string.IsNullOrEmpty(autotuneArgs.ModelSize);

        var argsStruct = _mapper.Map <FastTextArgsStruct>(args);

        argsStruct.model = model_name.sup;

        var autotuneStruct = _mapper.Map <AutotuneArgsStruct>(autotuneArgs);

        CheckForErrors(Train(
                           _fastText,
                           inputPath,
                           quantizeWithNoQuantTune ? null : outputPath,
                           argsStruct,
                           autotuneStruct,
                           args.TrainProgressCallback,
                           autotuneArgs.AutotuneProgressCallback,
                           args.LabelPrefix,
                           args.PretrainedVectors,
                           debug));

        if (quantizeWithNoQuantTune)
        {
            Quantize(quantizedArgs, outputPath);
        }
        else
        {
            _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText));
            ModelPath    = AdjustPath(outputPath, !string.IsNullOrEmpty(autotuneArgs.ModelSize));
        }
    }
Exemple #6
0
 /// <summary>
 /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated
 /// hyperparameter search will be performed.
 /// </summary>
 /// <param name="inputPath">Path to a training set.</param>
 /// <param name="outputPath">Path to write the model to (excluding extension).</param>
 /// <param name="args">
 /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained.
 /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training.
 /// </param>
 /// <param name="autotuneArgs">Autotune arguments.</param>
 /// <param name="progressCallback">Optional progress callback.</param>
 /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks>
 public void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, TrainProgressCallback progressCallback = null)
 {
     Supervised(inputPath, outputPath, args, autotuneArgs, false);
 }