public void CanHandleUtf8()
        {
            var    fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath  = Path.Combine(_tempDir, "rus");

            fastText.Supervised("data.rus.txt", outPath, FastTextArgs.SupervisedDefaults());

            var labels = fastText.GetLabels();

            labels.Length.Should().Be(2);
            labels.Should().Contain(new[] { "__label__оператор", "__label__выход" });

            var pred = fastText.PredictSingle("Позови оператора");

            pred.Probability.Should().BeGreaterThan(0);
            pred.Label.Should().Be("__label__оператор");

            var sourceWords = File.ReadAllText("data.rus.txt")
                              .Split(new[] { " ", "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries)
                              .Where(x => !x.StartsWith("__label__"))
                              .Distinct().ToArray();
            var nn = fastText.GetNN("оператор", 2);

            nn.Length.Should().Be(2);
            sourceWords.Should().Contain(nn.Select(x => x.Label));
            foreach (var prediction in nn)
            {
                prediction.Probability.Should().BeGreaterThan(0);
            }
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            Log.Logger = new LoggerConfiguration()
                         .MinimumLevel.Debug()
                         .WriteTo.Console(theme: ConsoleTheme.None)
                         .CreateLogger();

            var log     = Log.ForContext <Program>();
            var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

            Directory.CreateDirectory(tempDir);

            log.Information($"Temp dir: {tempDir}");

            string outPath  = Path.Combine(tempDir, "cooking.bin");
            var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

            var ftArgs = FastTextArgs.SupervisedDefaults();

            ftArgs.epoch      = 15;
            ftArgs.lr         = 1;
            ftArgs.dim        = 300;
            ftArgs.wordNgrams = 2;
            ftArgs.minn       = 3;
            ftArgs.maxn       = 6;
            fastText.Supervised("cooking.train.txt", outPath, ftArgs);
        }
Ejemplo n.º 3
0
        static void Main(string[] args)
        {
            Log.Logger = new LoggerConfiguration()
                         .MinimumLevel.Debug()
                         .WriteTo.Console(theme: ConsoleTheme.None)
                         .CreateLogger();

            var log     = Log.ForContext <Program>();
            var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

            Directory.CreateDirectory(tempDir);

            log.Information($"Temp dir: {tempDir}");

            string outPath  = Path.Combine(tempDir, "cooking.bin");
            var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

            var ftArgs = FastTextArgs.SupervisedDefaults();

            ftArgs.epoch      = 15;
            ftArgs.lr         = 1;
            ftArgs.dim        = 300;
            ftArgs.wordNgrams = 2;
            ftArgs.minn       = 3;
            ftArgs.maxn       = 6;
            fastText.Supervised("cooking.train.txt", outPath, ftArgs);

            try
            {
                File.Delete("_debug.txt");
            }
            catch
            {
            }

            var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true);

            log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" +
                            $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" +
                            $"\n\tF1: {result.GlobalMetrics.GetF1()}");

            var curve = result.GetPrecisionRecallCurve();

            var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels());

            string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve });

            log.Information($"Precision-Recall plot: {plotPath}");

            Console.WriteLine("\nPress any key to exit.");
            Console.ReadKey();

            Directory.Delete(tempDir, true);
        }
        public void CanTrainModelWithOldApi()
        {
            var    fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath  = Path.Combine(_tempDir, "cooking");

            fastText.Train("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults());

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
        public void CanGetDefaultSupervisedArgs()
        {
            var args = FastTextArgs.SupervisedDefaults();

            args.bucket.Should().Be(2000000);
            args.dim.Should().Be(100);
            args.loss.Should().Be(LossName.Softmax);
            args.model.Should().Be(ModelName.Supervised);
            args.minCount.Should().Be(1);
            args.minn.Should().Be(0);
            args.maxn.Should().Be(0);
            args.lr.Should().BeApproximately(0.1d, 10e-5);
        }
        public void CantUsePretrainedVectorsWithDifferentDimension()
        {
            var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = FastTextArgs.SupervisedDefaults();

            args.PretrainedVectors = "cooking.unsup.300.vec";

            fastText.Invoking(x => x.Supervised("cooking.train.txt", outPath, args))
            .Should().Throw <NativeLibraryException>()
            .WithMessage("Dimension of pretrained vectors (300) does not match dimension (100)!");
        }
        public void CanTrainSupervisedWithNoLogging()
        {
            var    fastText = new FastTextWrapper();
            string outPath  = Path.Combine(_tempDir, "cooking");

            fastText.Supervised("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults());

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 8
0
        public SupervisedModelFixture()
        {
            _tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));
            Directory.CreateDirectory(_tempDir);

            FastText = new FastTextWrapper();
            string outPath = Path.Combine(_tempDir, "cooking");

            FastText.Supervised("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults());

            FastText.IsModelReady().Should().BeTrue();

            ModelPath = outPath + ".bin";

            File.Exists(ModelPath).Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
        public void CanUsePretrainedVectorsForSupervisedModel()
        {
            var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = FastTextArgs.SupervisedDefaults();

            args.PretrainedVectors = "cooking.unsup.300.vec";
            args.dim = 300;

            fastText.Supervised("cooking.train.txt", outPath, args);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(300);

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }