public void CanHandleUtf8()
        {
            var    fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath  = Path.Combine(_tempDir, "rus");

            fastText.Supervised("data.rus.txt", outPath, FastTextArgs.SupervisedDefaults());

            var labels = fastText.GetLabels();

            labels.Length.Should().Be(2);
            labels.Should().Contain(new[] { "__label__оператор", "__label__выход" });

            var pred = fastText.PredictSingle("Позови оператора");

            pred.Probability.Should().BeGreaterThan(0);
            pred.Label.Should().Be("__label__оператор");

            var sourceWords = File.ReadAllText("data.rus.txt")
                              .Split(new[] { " ", "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries)
                              .Where(x => !x.StartsWith("__label__"))
                              .Distinct().ToArray();
            var nn = fastText.GetNN("оператор", 2);

            nn.Length.Should().Be(2);
            sourceWords.Should().Contain(nn.Select(x => x.Label));
            foreach (var prediction in nn)
            {
                prediction.Probability.Should().BeGreaterThan(0);
            }
        }
Ejemplo n.º 2
0
 private static void TestNN(FastTextWrapper fastText)
 {
     fastText.GetNN("train", 5);
 }