public void CanTrainSupervised() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void CanTrainCbowWithProgressCallback() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); int callNum = 0; var args = new UnsupervisedArgs { TrainProgressCallback = (progress, loss, wst, lr, eta) => { callNum++; } }; fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath, args); callNum.Should().BeGreaterThan(0); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithProgressCallback() { using var fastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); int callNum = 0; var args = new SupervisedArgs { TrainProgressCallback = (progress, loss, wst, lr, eta) => { callNum++; } }; fastText.Supervised("cooking.train.txt", outPath, args); callNum.Should().BeGreaterThan(0); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanAutotuneSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs { bucket = 2100000, dim = 250, epoch = 10, loss = LossName.HierarchicalSoftmax, lr = 0.5, maxn = 5, minn = 2, neg = 6, seed = 42, t = 0.0002, thread = 10, verbose = 1, ws = 6, minCount = 2, saveOutput = true, wordNgrams = 2, lrUpdateRate = 110, minCountLabel = 1 }; var autotuneArgs = new AutotuneArgs { Duration = 30, Metric = "precisionAtRecall:30", Predictions = 2, ValidationFile = "cooking.valid.txt" }; fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(250); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void CanLoadSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); fastText.LoadModel(_fixture.FastText.ModelPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); AssertLabels(fastText.GetLabels()); }
public void CanQuantizeLoadedSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); fastText.LoadModel(_fixture.FastText.ModelPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); AssertLabels(fastText.GetLabels()); string newPath = Path.Combine(Path.GetDirectoryName(_fixture.FastText.ModelPath), Path.GetFileNameWithoutExtension(_fixture.FastText.ModelPath)); fastText.Quantize(); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(newPath + ".ftz"); File.Exists(newPath + ".ftz").Should().BeTrue(); File.Exists(newPath + ".vec").Should().BeTrue(); }
public void CanTrainCbowModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervised() { var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Supervised("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults()); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); CheckLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithNoLoggingAndNoArgs() { using var fastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Supervised("cooking.train.txt", outPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanUsePretrainedVectorsForSupervisedModel() { var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = FastTextArgs.SupervisedDefaults(); args.PretrainedVectors = "cooking.unsup.300.vec"; args.dim = 300; fastText.Supervised("cooking.train.txt", outPath, args); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(300); CheckLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithPretrainedVectors() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); args.PretrainedVectors = "cooking.unsup.300.vec"; args.dim = 300; fastText.Supervised("cooking.train.txt", outPath, args, new AutotuneArgs(), true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(300); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithRelativeOutput() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be("cooking.bin"); AssertLabels(fastText.GetLabels()); File.Exists("cooking.bin").Should().BeTrue(); File.Exists("cooking.vec").Should().BeTrue(); File.Delete("cooking.bin"); File.Delete("cooking.vec"); }