public void CanTrainSupervisedAndQuantize() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new QuantizedSupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".ftz"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".ftz").Should().BeTrue(); File.Exists(outPath + ".bin").Should().BeFalse(); File.Exists(outPath + ".vec").Should().BeFalse(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); }
/// <summary> /// Quantize a loaded model. /// </summary> /// <param name="args">Quantization args.</param> /// <param name="output">Custom output path. Required if model was loaded from memory.</param> public void Quantize(QuantizedSupervisedArgs args, string output = null) { if (!IsModelReady()) { throw new InvalidOperationException("Model is not loaded or trained!"); } if (string.IsNullOrEmpty(ModelPath) && string.IsNullOrEmpty(output)) { throw new InvalidOperationException("Model was loaded from memory. You need to specify output path."); } var argsStruct = _mapper.Map <FastTextArgsStruct>(args); string outPath = AdjustPath(string.IsNullOrEmpty(output) ? ModelPath : output, true); if ((Path.IsPathRooted(output) && !Directory.Exists(Path.GetDirectoryName(outPath)))) { throw new InvalidOperationException("Output directory doesn't exist!"); } CheckForErrors(Quantize(_fastText, outPath, argsStruct, args.LabelPrefix)); _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText)); ModelPath = outPath; }
private void AssertQuantizedArgs(QuantizedSupervisedArgs expected, QuantizedSupervisedArgs actual) { AssertSupervisedArgs(expected, actual); actual.qout.Should().Be(expected.qout); actual.retrain.Should().Be(expected.retrain); actual.qnorm.Should().Be(expected.qnorm); actual.cutoff.Should().Be(expected.cutoff); actual.dsub.Should().Be(expected.dsub); }
public void CanGetDefaultQuantizeArgs() { var args = new QuantizedSupervisedArgs(); args.dsub.Should().Be(2); }
public void CanAutotuneQuantizedSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new QuantizedSupervisedArgs { bucket = 2100000, dim = 250, epoch = 10, loss = LossName.HierarchicalSoftmax, lr = 0.5, maxn = 5, minn = 2, neg = 6, seed = 42, t = 0.0002, thread = 10, verbose = 1, ws = 6, minCount = 2, saveOutput = true, wordNgrams = 2, lrUpdateRate = 110, minCountLabel = 1, cutoff = 10000, dsub = 3, retrain = true }; var autotuneArgs = new AutotuneArgs { Duration = 60, Metric = "precisionAtRecall:30", Predictions = 2, ModelSize = "10M", ValidationFile = "cooking.valid.txt" }; fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(250); fastText.ModelPath.Should().Be(outPath + ".ftz"); File.Exists(outPath + ".ftz").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertQuantizedArgs(args, debugArgs.ExternalArgs); AssertQuantizedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }