public void CanTrainSupervised() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void CanTrainSupervisedWithProgressCallback() { using var fastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); int callNum = 0; var args = new SupervisedArgs { TrainProgressCallback = (progress, loss, wst, lr, eta) => { callNum++; } }; fastText.Supervised("cooking.train.txt", outPath, args); callNum.Should().BeGreaterThan(0); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanAutotuneSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs { bucket = 2100000, dim = 250, epoch = 10, loss = LossName.HierarchicalSoftmax, lr = 0.5, maxn = 5, minn = 2, neg = 6, seed = 42, t = 0.0002, thread = 10, verbose = 1, ws = 6, minCount = 2, saveOutput = true, wordNgrams = 2, lrUpdateRate = 110, minCountLabel = 1 }; var autotuneArgs = new AutotuneArgs { Duration = 30, Metric = "precisionAtRecall:30", Predictions = 2, ValidationFile = "cooking.valid.txt" }; fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(250); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
static void Main(string[] args) { Log.Logger = new LoggerConfiguration() .MinimumLevel.Debug() .WriteTo.Console(theme: ConsoleTheme.None) .CreateLogger(); var log = Log.ForContext <Program>(); var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); Directory.CreateDirectory(tempDir); log.Information($"Temp dir: {tempDir}"); string outPath = Path.Combine(tempDir, "cooking.bin"); var fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() })); var ftArgs = new SupervisedArgs(); ftArgs.epoch = 15; ftArgs.lr = 1; ftArgs.dim = 300; ftArgs.wordNgrams = 2; ftArgs.minn = 3; ftArgs.maxn = 6; fastText.Supervised("cooking.train.txt", outPath, ftArgs); try { File.Delete("_debug.txt"); } catch { } var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true); log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" + $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" + $"\n\tF1: {result.GlobalMetrics.GetF1()}"); var curve = result.GetPrecisionRecallCurve(); var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels()); string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve }); log.Information($"Precision-Recall plot: {plotPath}"); Console.WriteLine("\nPress any key to exit."); Console.ReadKey(); Directory.Delete(tempDir, true); }
private static void TrainSupervised(FastTextWrapper fastText, string trainFile, string modelFile) { fastText.Train(trainFile, modelFile, SupervisedArgs.SupervisedDefaults( x => { x.Epochs = 25; x.LearningRate = 1.0; x.WordNGrams = 3; x.Verbose = 2; x.LabelPrefix = "__label__"; })); }
public void CanGetDefaultSupervisedArgs() { var args = new SupervisedArgs(); args.bucket.Should().Be(2000000); args.dim.Should().Be(100); args.loss.Should().Be(LossName.Softmax); args.model.Should().Be(ModelName.Supervised); args.minCount.Should().Be(1); args.minn.Should().Be(0); args.maxn.Should().Be(0); args.lr.Should().BeApproximately(0.1d, 10e-5); }
public void CantTrainSupervisedWithPretrainedVectorsWithDifferentDimension() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); args.PretrainedVectors = "cooking.unsup.300.vec"; fastText.Invoking(x => x.Supervised("cooking.train.txt", outPath, args)) .Should().Throw <NativeLibraryException>() .WithMessage("Dimension of pretrained vectors (300) does not match dimension (100)!"); }
/// <summary> /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated /// hyperparameter search will be performed. /// </summary> /// <param name="inputPath">Path to a training set.</param> /// <param name="outputPath">Path to write the model to (excluding extension).</param> /// <param name="args"> /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained. /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training. /// </param> /// <param name="autotuneArgs">Autotune arguments.</param> /// <param name="debug">Whether to write debug info.</param> /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks> internal void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, bool debug) { ValidatePaths(inputPath, outputPath, args.PretrainedVectors); if (args.model != ModelName.Supervised) { _logger?.LogWarning($"{args.model} model type specified in a Supervised() call. Model type will be changed to Supervised."); } var quantizedArgs = args as QuantizedSupervisedArgs; if (!string.IsNullOrEmpty(autotuneArgs.ModelSize) && quantizedArgs == null) { throw new InvalidOperationException("You specified model size in autotuneArgs, but passed SupervisedArgs instance. Pass QuantizedSupervisedArgs instead."); } bool quantizeWithNoQuantTune = quantizedArgs != null && string.IsNullOrEmpty(autotuneArgs.ModelSize); var argsStruct = _mapper.Map <FastTextArgsStruct>(args); argsStruct.model = model_name.sup; var autotuneStruct = _mapper.Map <AutotuneArgsStruct>(autotuneArgs); CheckForErrors(Train( _fastText, inputPath, quantizeWithNoQuantTune ? null : outputPath, argsStruct, autotuneStruct, args.TrainProgressCallback, autotuneArgs.AutotuneProgressCallback, args.LabelPrefix, args.PretrainedVectors, debug)); if (quantizeWithNoQuantTune) { Quantize(quantizedArgs, outputPath); } else { _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText)); ModelPath = AdjustPath(outputPath, !string.IsNullOrEmpty(autotuneArgs.ModelSize)); } }
public void CanTrainSupervisedWithPretrainedVectors() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); args.PretrainedVectors = "cooking.unsup.300.vec"; args.dim = 300; fastText.Supervised("cooking.train.txt", outPath, args, new AutotuneArgs(), true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(300); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithRelativeOutput() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be("cooking.bin"); AssertLabels(fastText.GetLabels()); File.Exists("cooking.bin").Should().BeTrue(); File.Exists("cooking.vec").Should().BeTrue(); File.Delete("cooking.bin"); File.Delete("cooking.vec"); }
private void AssertSupervisedArgs(SupervisedArgs expected, SupervisedArgs actual) { actual.lr.Should().Be(expected.lr); actual.lrUpdateRate.Should().Be(expected.lrUpdateRate); actual.dim.Should().Be(expected.dim); actual.ws.Should().Be(expected.ws); actual.epoch.Should().Be(expected.epoch); actual.minCount.Should().Be(expected.minCount); actual.minCountLabel.Should().Be(expected.minCountLabel); actual.neg.Should().Be(expected.neg); actual.wordNgrams.Should().Be(expected.wordNgrams); actual.loss.Should().Be(expected.loss); actual.model.Should().Be(expected.model); actual.bucket.Should().Be(expected.bucket); actual.minn.Should().Be(expected.minn); actual.maxn.Should().Be(expected.maxn); actual.thread.Should().Be(expected.thread); actual.t.Should().Be(expected.t); (actual.LabelPrefix ?? "").Should().Be(expected.LabelPrefix ?? ""); actual.verbose.Should().Be(expected.verbose); (actual.PretrainedVectors ?? "").Should().Be(expected.PretrainedVectors ?? ""); actual.saveOutput.Should().Be(expected.saveOutput); actual.seed.Should().Be(expected.seed); }
private static void TrainSupervised() { using (var fastText = new FastTextWrapper()) { fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", SupervisedArgs.SupervisedDefaults(x => { x.Epochs = 25; x.LearningRate = 1.0; x.WordNGrams = 3; x.Verbose = 2; x.LabelPrefix = "__label__"; })); } }
/// <summary> /// Trains a new supervised model. If <see cref="AutotuneArgs.ValidationFile"/> is specified, an automated /// hyperparameter search will be performed. /// </summary> /// <param name="inputPath">Path to a training set.</param> /// <param name="outputPath">Path to write the model to (excluding extension).</param> /// <param name="args"> /// Training arguments. If <see cref="SupervisedArgs"/> is passed, a supervised model will be trained. /// If <see cref="QuantizedSupervisedArgs"/> is passed, model will be quantized after training. /// </param> /// <param name="autotuneArgs">Autotune arguments.</param> /// <param name="progressCallback">Optional progress callback.</param> /// <remarks>Trained model will consist of two files: .bin (main model) and .vec (word vectors).</remarks> public void Supervised(string inputPath, string outputPath, SupervisedArgs args, AutotuneArgs autotuneArgs, TrainProgressCallback progressCallback = null) { Supervised(inputPath, outputPath, args, autotuneArgs, false); }
static void Main(string[] args) { Log.Logger = new LoggerConfiguration() .MinimumLevel.Debug() .WriteTo.Console(theme: ConsoleTheme.None) .CreateLogger(); var log = Log.ForContext <Program>(); var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); Directory.CreateDirectory(tempDir); log.Information($"Temp dir: {tempDir}"); string outPath = Path.Combine(tempDir, "cooking.bin"); var fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() })); AnsiConsole.Progress() .Start(ctx => { var task = ctx.AddTask("Training"); var ftArgs = new SupervisedArgs { epoch = 15, lr = 1, dim = 300, wordNgrams = 2, minn = 3, maxn = 6, verbose = 0, TrainProgressCallback = (progress, loss, wst, lr, eta) => { task.Value = Math.Ceiling(progress * 100); task.Description = $"Loss: {loss:N3}, words/thread/sec: {wst}, LR: {lr:N5}, ETA: {eta}"; } }; fastText.Supervised("cooking.train.txt", outPath, ftArgs); }); try { File.Delete("_debug.txt"); } catch { } log.Information("Validating model on the test set"); var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true); log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" + $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" + $"\n\tF1: {result.GlobalMetrics.GetF1()}"); var curve = result.GetPrecisionRecallCurve(); var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels()); string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve }); log.Information($"Precision-Recall plot: {plotPath}"); Console.WriteLine("\nPress any key to exit."); Console.ReadKey(); Directory.Delete(tempDir, true); }