Ejemplo n.º 1
0
 private static void Test(FastTextWrapper fastText)
 {
     var labels      = fastText.GetLabels();
     var prediction  = fastText.PredictSingle("Can I use a larger crockpot than the recipe calls for?");
     var predictions = fastText.PredictMultiple("Can I use a larger crockpot than the recipe calls for?", 4);
     var vector      = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?");
 }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            Log.Logger = new LoggerConfiguration()
                         .MinimumLevel.Debug()
                         .WriteTo.Console(theme: ConsoleTheme.None)
                         .CreateLogger();

            var log     = Log.ForContext <Program>();
            var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

            Directory.CreateDirectory(tempDir);

            log.Information($"Temp dir: {tempDir}");

            string outPath  = Path.Combine(tempDir, "cooking.bin");
            var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

            var ftArgs = FastTextArgs.SupervisedDefaults();

            ftArgs.epoch      = 15;
            ftArgs.lr         = 1;
            ftArgs.dim        = 300;
            ftArgs.wordNgrams = 2;
            ftArgs.minn       = 3;
            ftArgs.maxn       = 6;
            fastText.Supervised("cooking.train.txt", outPath, ftArgs);
        }
Ejemplo n.º 3
0
        public void CanTrainCbowWithProgressCallback()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");
            int    callNum = 0;

            var args = new UnsupervisedArgs
            {
                TrainProgressCallback = (progress, loss, wst, lr, eta) =>
                {
                    callNum++;
                }
            };

            fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath, args);

            callNum.Should().BeGreaterThan(0);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 4
0
        public void CanHandleUtf8()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "rus");

            fastText.Supervised("data.rus.txt", outPath, new SupervisedArgs());

            var labels = fastText.GetLabels();

            labels.Length.Should().Be(2);
            labels.Should().Contain(new[] { "__label__оператор", "__label__выход" });

            var pred = fastText.PredictSingle("Позови оператора");

            pred.Probability.Should().BeGreaterThan(0);
            pred.Label.Should().Be("__label__оператор");

            var sourceWords = File.ReadAllText("data.rus.txt")
                              .Split(new[] { " ", "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries)
                              .Where(x => !x.StartsWith("__label__"))
                              .Distinct().ToArray();
            var nn = fastText.GetNearestNeighbours("оператор", 2);

            nn.Length.Should().Be(2);
            sourceWords.Should().Contain(nn.Select(x => x.Label));
            foreach (var prediction in nn)
            {
                prediction.Probability.Should().BeGreaterThan(0);
            }
        }
Ejemplo n.º 5
0
        public void CanTrainSupervisedWithProgressCallback()
        {
            using var fastText = new FastTextWrapper();
            string outPath = Path.Combine(_tempDir, "cooking");
            int    callNum = 0;

            var args = new SupervisedArgs
            {
                TrainProgressCallback = (progress, loss, wst, lr, eta) =>
                {
                    callNum++;
                }
            };

            fastText.Supervised("cooking.train.txt", outPath, args);

            callNum.Should().BeGreaterThan(0);
            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 6
0
        public void CanTrainSupervised()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Ejemplo n.º 7
0
 public void Word2Vec()
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.LoadModel(Path.Combine(dataDir, "dbpedia.ftz"));
         var vector = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?");
     }
 }
Ejemplo n.º 8
0
 static void Main(string[] args)
 {
     using (var fastText = new FastTextWrapper())
     {
         TrainSupervised();
         //TrainLowLevel();
         LoadModel();
     }
 }
Ejemplo n.º 9
0
 private static void TrainLowLevel()
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", new FastTextArgs
         {
         });
     }
 }
Ejemplo n.º 10
0
 public static double[] Cosine(string src, string[] dst, string model)
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.LoadModel(model);
         var vector = fastText.GetSentenceVector(src.ToLower());
         return(dst.Select(x => CalCosine(vector, fastText.GetSentenceVector(x.ToLower()))).ToArray());
     }
 }
Ejemplo n.º 11
0
        public void CanAutotuneSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            var args = new SupervisedArgs
            {
                bucket        = 2100000,
                dim           = 250,
                epoch         = 10,
                loss          = LossName.HierarchicalSoftmax,
                lr            = 0.5,
                maxn          = 5,
                minn          = 2,
                neg           = 6,
                seed          = 42,
                t             = 0.0002,
                thread        = 10,
                verbose       = 1,
                ws            = 6,
                minCount      = 2,
                saveOutput    = true,
                wordNgrams    = 2,
                lrUpdateRate  = 110,
                minCountLabel = 1
            };

            var autotuneArgs = new AutotuneArgs
            {
                Duration       = 30,
                Metric         = "precisionAtRecall:30",
                Predictions    = 2,
                ValidationFile = "cooking.valid.txt"
            };

            fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(250);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();

            var debugArgs = DebugArgs.Load("_train.txt");

            AssertSupervisedArgs(args, debugArgs.ExternalArgs);
            AssertSupervisedArgs(args, debugArgs.ConvertedArgs);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune);
            AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune);

            debugArgs.ExternalInput.Should().Be("cooking.train.txt");
            debugArgs.ConvertedInput.Should().Be("cooking.train.txt");
            debugArgs.ExternalOutput.Should().Be(outPath);
            debugArgs.ConvertedOutput.Should().Be(outPath);
        }
Ejemplo n.º 12
0
        public void CanLoadSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            fastText.LoadModel(_fixture.FastText.ModelPath);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);

            AssertLabels(fastText.GetLabels());
        }
Ejemplo n.º 13
0
 private static void LoadModel()
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.LoadModel(@"D:\__Models\cooking.bin");
         var labels      = fastText.GetLabels();
         var prediction  = fastText.PredictSingle("Can I use a larger crockpot than the recipe calls for?");
         var predictions = fastText.PredictMultiple("Can I use a larger crockpot than the recipe calls for?", 4);
         var vector      = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?");
     }
 }
Ejemplo n.º 14
0
        static void Main(string[] args)
        {
            Log.Logger = new LoggerConfiguration()
                         .MinimumLevel.Debug()
                         .WriteTo.Console(theme: ConsoleTheme.None)
                         .CreateLogger();

            var log     = Log.ForContext <Program>();
            var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));

            Directory.CreateDirectory(tempDir);

            log.Information($"Temp dir: {tempDir}");

            string outPath  = Path.Combine(tempDir, "cooking.bin");
            var    fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() }));

            var ftArgs = FastTextArgs.SupervisedDefaults();

            ftArgs.epoch      = 15;
            ftArgs.lr         = 1;
            ftArgs.dim        = 300;
            ftArgs.wordNgrams = 2;
            ftArgs.minn       = 3;
            ftArgs.maxn       = 6;
            fastText.Supervised("cooking.train.txt", outPath, ftArgs);

            try
            {
                File.Delete("_debug.txt");
            }
            catch
            {
            }

            var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true);

            log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" +
                            $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" +
                            $"\n\tF1: {result.GlobalMetrics.GetF1()}");

            var curve = result.GetPrecisionRecallCurve();

            var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels());

            string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve });

            log.Information($"Precision-Recall plot: {plotPath}");

            Console.WriteLine("\nPress any key to exit.");
            Console.ReadKey();

            Directory.Delete(tempDir, true);
        }
        public void CanTrainModelWithOldApi()
        {
            var    fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath  = Path.Combine(_tempDir, "cooking");

            fastText.Train("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults());

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 16
0
 private static void TrainSupervised(FastTextWrapper fastText, string trainFile, string modelFile)
 {
     fastText.Train(trainFile, modelFile, SupervisedArgs.SupervisedDefaults(
                        x =>
     {
         x.Epochs       = 25;
         x.LearningRate = 1.0;
         x.WordNGrams   = 3;
         x.Verbose      = 2;
         x.LabelPrefix  = "__label__";
     }));
 }
Ejemplo n.º 17
0
        public void CantTrainSupervisedWithPretrainedVectorsWithDifferentDimension()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = new SupervisedArgs();

            args.PretrainedVectors = "cooking.unsup.300.vec";

            fastText.Invoking(x => x.Supervised("cooking.train.txt", outPath, args))
            .Should().Throw <NativeLibraryException>()
            .WithMessage("Dimension of pretrained vectors (300) does not match dimension (100)!");
        }
Ejemplo n.º 18
0
        public void CanTrainCbowModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outPath = Path.Combine(_tempDir, "cooking");

            fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
        public SupervisedModelFixture()
        {
            _tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));
            Directory.CreateDirectory(_tempDir);

            FastText = new FastTextWrapper();
            string outPath = Path.Combine(_tempDir, "cooking");

            FastText.Supervised("cooking.train.txt", outPath, new SupervisedArgs());
            FastText.IsModelReady().Should().BeTrue();

            File.Exists(FastText.ModelPath).Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 20
0
 private static void TrainSupervised()
 {
     using (var fastText = new FastTextWrapper())
     {
         fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", SupervisedArgs.SupervisedDefaults(x =>
         {
             x.Epochs       = 25;
             x.LearningRate = 1.0;
             x.WordNGrams   = 3;
             x.Verbose      = 2;
             x.LabelPrefix  = "__label__";
         }));
     }
 }
        public void CanTrainSupervisedWithNoLogging()
        {
            var    fastText = new FastTextWrapper();
            string outPath  = Path.Combine(_tempDir, "cooking");

            fastText.Supervised("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults());

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 22
0
        static void Main(string[] args)
        {
            var fastText = new FastTextWrapper();

            fastText.Train(@"C:\_Models\cooking.train.txt", @"C:\_Models\cooking", TrainingArgs.SupervisedDefaults(x =>
            {
                x.Epochs        = 25;
                x.LearningRate  = 1.0;
                x.WordNGrams    = 3;
                x.Verbose       = 2;
                x.MinCharNGrams = 3;
                x.MaxCharNGrams = 6;
            }));

            //fastText.LoadModel(@"C:\_Models\fasttext.bin");
            var prediction = fastText.PredictSingle("what is the difference between a new york strip and a bone-in new york cut sirloin ?");
        }
        public void CanUsePretrainedVectorsForSupervisedModel()
        {
            var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = FastTextArgs.SupervisedDefaults();

            args.PretrainedVectors = "cooking.unsup.300.vec";
            args.dim = 300;

            fastText.Supervised("cooking.train.txt", outPath, args);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(300);

            CheckLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 24
0
        public void CanTrainSupervisedWithRelativeOutput()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            var args     = new SupervisedArgs();
            var tuneArgs = new AutotuneArgs();

            fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be("cooking.bin");

            AssertLabels(fastText.GetLabels());

            File.Exists("cooking.bin").Should().BeTrue();
            File.Exists("cooking.vec").Should().BeTrue();

            File.Delete("cooking.bin");
            File.Delete("cooking.vec");
        }
Ejemplo n.º 25
0
        public void CanQuantizeLoadedSupervisedModel()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);
            fastText.LoadModel(_fixture.FastText.ModelPath);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);

            AssertLabels(fastText.GetLabels());

            string newPath = Path.Combine(Path.GetDirectoryName(_fixture.FastText.ModelPath), Path.GetFileNameWithoutExtension(_fixture.FastText.ModelPath));

            fastText.Quantize();

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(100);
            fastText.ModelPath.Should().Be(newPath + ".ftz");

            File.Exists(newPath + ".ftz").Should().BeTrue();
            File.Exists(newPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 26
0
        public void CanTrainSupervisedWithPretrainedVectors()
        {
            using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory);

            string outPath = Path.Combine(_tempDir, "cooking");
            var    args    = new SupervisedArgs();

            args.PretrainedVectors = "cooking.unsup.300.vec";
            args.dim = 300;

            fastText.Supervised("cooking.train.txt", outPath, args, new AutotuneArgs(), true);

            fastText.IsModelReady().Should().BeTrue();
            fastText.GetModelDimension().Should().Be(300);
            fastText.ModelPath.Should().Be(outPath + ".bin");

            AssertLabels(fastText.GetLabels());

            File.Exists(outPath + ".bin").Should().BeTrue();
            File.Exists(outPath + ".vec").Should().BeTrue();
        }
Ejemplo n.º 27
0
        static void Main(string[] args)
        {
            var model = Path.Combine(@"D:\SciSharp\CherubNLP\data", "dbpedia.bin");

            using (var fastText = new FastTextWrapper())
            {
                fastText.LoadModel(model);
                var vector1 = fastText.GetSentenceVector("Hello");
            }

            var similarities = Similarity.Cosine("Power Outage -Fifth & Park - JPMC150713", new[]
            {
                "Cosine Similarity algorithm function sample.",
                "Power Restored -Fifth & Park - JPMC150713",
                "Compute the similarity of two hardcoded lists.",
                "We can compute the similarity of two hardcoded lists.",
                "Coronavirus app could trace your contacts without sacrificing your privacy"
            }, model);

            // var test = new KaggleTest();
            // test.SpookyAuthorIdentification();
        }
Ejemplo n.º 28
0
        static void Main(string[] args)
        {
            if ((args.FirstOrDefault() == "nn" && args.Length < 2) || (args.FirstOrDefault() != "nn" && args.Length < 3))
            {
                Console.WriteLine(Usage);
                return;
            }

            using (var fastText = new FastTextWrapper())
            {
                switch (args[0])
                {
                case "train":
                    TrainSupervised(fastText, args[1], args[2]);
                    break;

                case "trainlowlevel":
                    TrainLowLevel(fastText, args[1], args[2]);
                    break;

                case "load":
                    fastText.LoadModel(args[2]);
                    break;
                }

                if (args[0] != "nn")
                {
                    Test(fastText);
                }
                else
                {
                    fastText.LoadModel(File.ReadAllBytes(args[1]));
                    TestNN(fastText);
                }
            }
        }
Ejemplo n.º 29
0
        public void SkipgramAndCBowLearnDifferentRepresentations()
        {
            using var sg = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outSG = Path.Combine(_tempDir, "cooking");

            sg.Unsupervised(UnsupervisedModel.SkipGram, "cooking.train.nolabels.txt", outSG);

            using var cbow = new FastTextWrapper(loggerFactory: _loggerFactory);
            string outCbow = Path.Combine(_tempDir, "cooking");

            cbow.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outCbow);

            var nnSg   = sg.GetNearestNeighbours("pot", 10);
            var nnCbow = cbow.GetNearestNeighbours("pot", 10);
            var nnSup  = _fixture.FastText.GetNearestNeighbours("pot", 10);

            void CheckPair(Prediction[] first, Prediction[] second)
            {
                int samePredictions = 0;

                foreach (var prediction in first)
                {
                    if (second.Any(x => x.Label == prediction.Label))
                    {
                        samePredictions++;
                    }
                }

                // We want less than a half of same predictions.
                samePredictions.Should().BeLessThan(first.Length / 2);
            }

            CheckPair(nnSg, nnCbow);
            CheckPair(nnSg, nnSup);
            CheckPair(nnCbow, nnSup);
        }
Ejemplo n.º 30
0
        public void EmptyModelIsNotReady()
        {
            using var fastText = new FastTextWrapper();

            fastText.IsModelReady().Should().BeFalse();
        }