示例#1
0
        public void CanTrainSupervisedAndQuantize()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args     = new QuantizedSupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".ftz");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".ftz").Should().BeTrue();
            File.Exists(outPath + ".bin").Should().BeFalse();
            File.Exists(outPath + ".vec").Should().BeFalse();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
        }
示例#2
0
    /// <summary>
    /// Quantize a loaded model.
    /// </summary>
    /// <param name="args">Quantization args.</param>
    /// <param name="output">Custom output path. Required if model was loaded from memory.</param>
    public void Quantize(QuantizedSupervisedArgs args, string output = null)
    {
        if (!IsModelReady())
        {
            throw new InvalidOperationException("Model is not loaded or trained!");
        }

        if (string.IsNullOrEmpty(ModelPath) && string.IsNullOrEmpty(output))
        {
            throw new InvalidOperationException("Model was loaded from memory. You need to specify output path.");
        }

        var    argsStruct = _mapper.Map <FastTextArgsStruct>(args);
        string outPath    = AdjustPath(string.IsNullOrEmpty(output) ? ModelPath : output, true);

        if ((Path.IsPathRooted(output) && !Directory.Exists(Path.GetDirectoryName(outPath))))
        {
            throw new InvalidOperationException("Output directory doesn't exist!");
        }

        CheckForErrors(Quantize(_fastText, outPath, argsStruct, args.LabelPrefix));
        _maxLabelLen = CheckForErrors(GetMaxLabelLength(_fastText));

        ModelPath = outPath;
    }
示例#3
0
        private void AssertQuantizedArgs(QuantizedSupervisedArgs expected, QuantizedSupervisedArgs actual)
        {
            AssertSupervisedArgs(expected, actual);

            actual.qout.Should().Be(expected.qout);
            actual.retrain.Should().Be(expected.retrain);
            actual.qnorm.Should().Be(expected.qnorm);
            actual.cutoff.Should().Be(expected.cutoff);
            actual.dsub.Should().Be(expected.dsub);
        }
示例#4
0
        public void CanGetDefaultQuantizeArgs()
        {
            var args = new QuantizedSupervisedArgs();

            args.dsub.Should().Be(2);
        }
示例#5
0
        public void CanAutotuneQuantizedSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args = new QuantizedSupervisedArgs
            {
                bucket        = 2100000,
                dim           = 250,
                epoch         = 10,
                loss          = LossName.HierarchicalSoftmax,
                lr            = 0.5,
                maxn          = 5,
                minn          = 2,
                neg           = 6,
                seed          = 42,
                t             = 0.0002,
                thread        = 10,
                verbose       = 1,
                ws            = 6,
                minCount      = 2,
                saveOutput    = true,
                wordNgrams    = 2,
                lrUpdateRate  = 110,
                minCountLabel = 1,

                cutoff  = 10000,
                dsub    = 3,
                retrain = true
            };

            var autotuneArgs = new AutotuneArgs
            {
                Duration       = 60,
                Metric         = "precisionAtRecall:30",
                Predictions    = 2,
                ModelSize      = "10M",
                ValidationFile = "cooking.valid.txt"
            };

            fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(250);
            fastText.ModelPath.Should().Be(outPath + ".ftz");

            File.Exists(outPath + ".ftz").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();


            var debugArgs = DebugArgs.Load("_train.txt");

            AssertQuantizedArgs(args, debugArgs.ExternalArgs);
            AssertQuantizedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }