public void CanTrainSupervised() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
private void AssertAutotuneArgs(AutotuneArgs expected, AutotuneArgs actual) { (actual.ValidationFile ?? "").Should().Be(expected.ValidationFile ?? ""); (actual.Metric ?? "").Should().Be(expected.Metric ?? ""); actual.Predictions.Should().Be(expected.Predictions); actual.Duration.Should().Be(expected.Duration); (actual.ModelSize ?? "").Should().Be(expected.ModelSize ?? ""); }
public void CanAutotuneSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs { bucket = 2100000, dim = 250, epoch = 10, loss = LossName.HierarchicalSoftmax, lr = 0.5, maxn = 5, minn = 2, neg = 6, seed = 42, t = 0.0002, thread = 10, verbose = 1, ws = 6, minCount = 2, saveOutput = true, wordNgrams = 2, lrUpdateRate = 110, minCountLabel = 1 }; var autotuneArgs = new AutotuneArgs { Duration = 30, Metric = "precisionAtRecall:30", Predictions = 2, ValidationFile = "cooking.valid.txt" }; fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(250); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void CanTrainSupervisedWithRelativeOutput() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be("cooking.bin"); AssertLabels(fastText.GetLabels()); File.Exists("cooking.bin").Should().BeTrue(); File.Exists("cooking.vec").Should().BeTrue(); File.Delete("cooking.bin"); File.Delete("cooking.vec"); }
/// <summary> /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated /// hyperparameter search will be performed. /// </summary> /// <param name="inputPath">Path to a training set.</param> /// <param name="outputPath">Path to write the model to (excluding extension).</param> /// <param name="args"> /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained. /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training. /// </param> /// <param name="autotuneArgs">Autotune arguments.</param> /// <param name="debug">Whether to write debug info.</param> /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks> internal void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, bool debug) { ValidatePaths(inputPath, outputPath, args.PretrainedVectors); if (args.model != ModelName.Supervised) { _logger?.LogWarning($"{args.model} model type specified in a Supervised() call. Model type will be changed to Supervised."); } var quantizedArgs = args as QuantizedSupervisedArgs; if (!string.IsNullOrEmpty(autotuneArgs.ModelSize) && quantizedArgs == null) { throw new InvalidOperationException("You specified model size in autotuneArgs, but passed SupervisedArgs instance. Pass QuantizedSupervisedArgs instead."); } bool quantizeWithNoQuantTune = quantizedArgs != null && string.IsNullOrEmpty(autotuneArgs.ModelSize); var argsStruct = _mapper.Map <FastTextArgsStruct>(args); argsStruct.model = model_name.sup; var autotuneStruct = _mapper.Map <AutotuneArgsStruct>(autotuneArgs); CheckForErrors(Train( _fastText, inputPath, quantizeWithNoQuantTune ? null : outputPath, argsStruct, autotuneStruct, args.TrainProgressCallback, autotuneArgs.AutotuneProgressCallback, args.LabelPrefix, args.PretrainedVectors, debug)); if (quantizeWithNoQuantTune) { Quantize(quantizedArgs, outputPath); } else { _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText)); ModelPath = AdjustPath(outputPath, !string.IsNullOrEmpty(autotuneArgs.ModelSize)); } }
/// <summary> /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated /// hyperparameter search will be performed. /// </summary> /// <param name="inputPath">Path to a training set.</param> /// <param name="outputPath">Path to write the model to (excluding extension).</param> /// <param name="args"> /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained. /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training. /// </param> /// <param name="autotuneArgs">Autotune arguments.</param> /// <param name="progressCallback">Optional progress callback.</param> /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks> public void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, TrainProgressCallback progressCallback = null) { Supervised(inputPath, outputPath, args, autotuneArgs, false); }