private static void Test(FastTextWrapper fastText) { var labels = fastText.GetLabels(); var prediction = fastText.PredictSingle("Can I use a larger crockpot than the recipe calls for?"); var predictions = fastText.PredictMultiple("Can I use a larger crockpot than the recipe calls for?", 4); var vector = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?"); }
static void Main(string[] args) { Log.Logger = new LoggerConfiguration() .MinimumLevel.Debug() .WriteTo.Console(theme: ConsoleTheme.None) .CreateLogger(); var log = Log.ForContext <Program>(); var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); Directory.CreateDirectory(tempDir); log.Information($"Temp dir: {tempDir}"); string outPath = Path.Combine(tempDir, "cooking.bin"); var fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() })); var ftArgs = FastTextArgs.SupervisedDefaults(); ftArgs.epoch = 15; ftArgs.lr = 1; ftArgs.dim = 300; ftArgs.wordNgrams = 2; ftArgs.minn = 3; ftArgs.maxn = 6; fastText.Supervised("cooking.train.txt", outPath, ftArgs); }
public void CanTrainCbowWithProgressCallback() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); int callNum = 0; var args = new UnsupervisedArgs { TrainProgressCallback = (progress, loss, wst, lr, eta) => { callNum++; } }; fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath, args); callNum.Should().BeGreaterThan(0); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanHandleUtf8() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "rus"); fastText.Supervised("data.rus.txt", outPath, new SupervisedArgs()); var labels = fastText.GetLabels(); labels.Length.Should().Be(2); labels.Should().Contain(new[] { "__label__оператор", "__label__выход" }); var pred = fastText.PredictSingle("Позови оператора"); pred.Probability.Should().BeGreaterThan(0); pred.Label.Should().Be("__label__оператор"); var sourceWords = File.ReadAllText("data.rus.txt") .Split(new[] { " ", "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries) .Where(x => !x.StartsWith("__label__")) .Distinct().ToArray(); var nn = fastText.GetNearestNeighbours("оператор", 2); nn.Length.Should().Be(2); sourceWords.Should().Contain(nn.Select(x => x.Label)); foreach (var prediction in nn) { prediction.Probability.Should().BeGreaterThan(0); } }
public void CanTrainSupervisedWithProgressCallback() { using var fastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); int callNum = 0; var args = new SupervisedArgs { TrainProgressCallback = (progress, loss, wst, lr, eta) => { callNum++; } }; fastText.Supervised("cooking.train.txt", outPath, args); callNum.Should().BeGreaterThan(0); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervised() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", outPath, args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(tuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(tuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void Word2Vec() { using (var fastText = new FastTextWrapper()) { fastText.LoadModel(Path.Combine(dataDir, "dbpedia.ftz")); var vector = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?"); } }
static void Main(string[] args) { using (var fastText = new FastTextWrapper()) { TrainSupervised(); //TrainLowLevel(); LoadModel(); } }
private static void TrainLowLevel() { using (var fastText = new FastTextWrapper()) { fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", new FastTextArgs { }); } }
public static double[] Cosine(string src, string[] dst, string model) { using (var fastText = new FastTextWrapper()) { fastText.LoadModel(model); var vector = fastText.GetSentenceVector(src.ToLower()); return(dst.Select(x => CalCosine(vector, fastText.GetSentenceVector(x.ToLower()))).ToArray()); } }
public void CanAutotuneSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs { bucket = 2100000, dim = 250, epoch = 10, loss = LossName.HierarchicalSoftmax, lr = 0.5, maxn = 5, minn = 2, neg = 6, seed = 42, t = 0.0002, thread = 10, verbose = 1, ws = 6, minCount = 2, saveOutput = true, wordNgrams = 2, lrUpdateRate = 110, minCountLabel = 1 }; var autotuneArgs = new AutotuneArgs { Duration = 30, Metric = "precisionAtRecall:30", Predictions = 2, ValidationFile = "cooking.valid.txt" }; fastText.Supervised("cooking.train.txt", outPath, args, autotuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(250); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); var debugArgs = DebugArgs.Load("_train.txt"); AssertSupervisedArgs(args, debugArgs.ExternalArgs); AssertSupervisedArgs(args, debugArgs.ConvertedArgs); AssertAutotuneArgs(autotuneArgs, debugArgs.ExternalTune); AssertAutotuneArgs(autotuneArgs, debugArgs.ConvertedTune); debugArgs.ExternalInput.Should().Be("cooking.train.txt"); debugArgs.ConvertedInput.Should().Be("cooking.train.txt"); debugArgs.ExternalOutput.Should().Be(outPath); debugArgs.ConvertedOutput.Should().Be(outPath); }
public void CanLoadSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); fastText.LoadModel(_fixture.FastText.ModelPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); AssertLabels(fastText.GetLabels()); }
private static void LoadModel() { using (var fastText = new FastTextWrapper()) { fastText.LoadModel(@"D:\__Models\cooking.bin"); var labels = fastText.GetLabels(); var prediction = fastText.PredictSingle("Can I use a larger crockpot than the recipe calls for?"); var predictions = fastText.PredictMultiple("Can I use a larger crockpot than the recipe calls for?", 4); var vector = fastText.GetSentenceVector("Can I use a larger crockpot than the recipe calls for?"); } }
static void Main(string[] args) { Log.Logger = new LoggerConfiguration() .MinimumLevel.Debug() .WriteTo.Console(theme: ConsoleTheme.None) .CreateLogger(); var log = Log.ForContext <Program>(); var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); Directory.CreateDirectory(tempDir); log.Information($"Temp dir: {tempDir}"); string outPath = Path.Combine(tempDir, "cooking.bin"); var fastText = new FastTextWrapper(loggerFactory: new LoggerFactory(new[] { new SerilogLoggerProvider() })); var ftArgs = FastTextArgs.SupervisedDefaults(); ftArgs.epoch = 15; ftArgs.lr = 1; ftArgs.dim = 300; ftArgs.wordNgrams = 2; ftArgs.minn = 3; ftArgs.maxn = 6; fastText.Supervised("cooking.train.txt", outPath, ftArgs); try { File.Delete("_debug.txt"); } catch { } var result = fastText.TestInternal("cooking.valid.txt", 1, 0.0f, true); log.Information($"Results:\n\tPrecision: {result.GlobalMetrics.GetPrecision()}" + $"\n\tRecall: {result.GlobalMetrics.GetRecall()}" + $"\n\tF1: {result.GlobalMetrics.GetF1()}"); var curve = result.GetPrecisionRecallCurve(); var(_, debugCurve) = TestResult.LoadDebugResult("_debug.txt", fastText.GetLabels()); string plotPath = PlotCurves(tempDir, new [] { curve, debugCurve }); log.Information($"Precision-Recall plot: {plotPath}"); Console.WriteLine("\nPress any key to exit."); Console.ReadKey(); Directory.Delete(tempDir, true); }
public void CanTrainModelWithOldApi() { var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Train("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults()); CheckLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
private static void TrainSupervised(FastTextWrapper fastText, string trainFile, string modelFile) { fastText.Train(trainFile, modelFile, SupervisedArgs.SupervisedDefaults( x => { x.Epochs = 25; x.LearningRate = 1.0; x.WordNGrams = 3; x.Verbose = 2; x.LabelPrefix = "__label__"; })); }
public void CantTrainSupervisedWithPretrainedVectorsWithDifferentDimension() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); args.PretrainedVectors = "cooking.unsup.300.vec"; fastText.Invoking(x => x.Supervised("cooking.train.txt", outPath, args)) .Should().Throw <NativeLibraryException>() .WithMessage("Dimension of pretrained vectors (300) does not match dimension (100)!"); }
public void CanTrainCbowModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(outPath + ".bin"); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public SupervisedModelFixture() { _tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N")); Directory.CreateDirectory(_tempDir); FastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); FastText.Supervised("cooking.train.txt", outPath, new SupervisedArgs()); FastText.IsModelReady().Should().BeTrue(); File.Exists(FastText.ModelPath).Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
private static void TrainSupervised() { using (var fastText = new FastTextWrapper()) { fastText.Train(@"D:\__Models\cooking.train.txt", @"D:\__Models\cooking", SupervisedArgs.SupervisedDefaults(x => { x.Epochs = 25; x.LearningRate = 1.0; x.WordNGrams = 3; x.Verbose = 2; x.LabelPrefix = "__label__"; })); } }
public void CanTrainSupervisedWithNoLogging() { var fastText = new FastTextWrapper(); string outPath = Path.Combine(_tempDir, "cooking"); fastText.Supervised("cooking.train.txt", outPath, FastTextArgs.SupervisedDefaults()); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); CheckLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
static void Main(string[] args) { var fastText = new FastTextWrapper(); fastText.Train(@"C:\_Models\cooking.train.txt", @"C:\_Models\cooking", TrainingArgs.SupervisedDefaults(x => { x.Epochs = 25; x.LearningRate = 1.0; x.WordNGrams = 3; x.Verbose = 2; x.MinCharNGrams = 3; x.MaxCharNGrams = 6; })); //fastText.LoadModel(@"C:\_Models\fasttext.bin"); var prediction = fastText.PredictSingle("what is the difference between a new york strip and a bone-in new york cut sirloin ?"); }
public void CanUsePretrainedVectorsForSupervisedModel() { var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = FastTextArgs.SupervisedDefaults(); args.PretrainedVectors = "cooking.unsup.300.vec"; args.dim = 300; fastText.Supervised("cooking.train.txt", outPath, args); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(300); CheckLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithRelativeOutput() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); var args = new SupervisedArgs(); var tuneArgs = new AutotuneArgs(); fastText.Supervised("cooking.train.txt", "cooking", args, tuneArgs, true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be("cooking.bin"); AssertLabels(fastText.GetLabels()); File.Exists("cooking.bin").Should().BeTrue(); File.Exists("cooking.vec").Should().BeTrue(); File.Delete("cooking.bin"); File.Delete("cooking.vec"); }
public void CanQuantizeLoadedSupervisedModel() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); fastText.LoadModel(_fixture.FastText.ModelPath); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); AssertLabels(fastText.GetLabels()); string newPath = Path.Combine(Path.GetDirectoryName(_fixture.FastText.ModelPath), Path.GetFileNameWithoutExtension(_fixture.FastText.ModelPath)); fastText.Quantize(); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(100); fastText.ModelPath.Should().Be(newPath + ".ftz"); File.Exists(newPath + ".ftz").Should().BeTrue(); File.Exists(newPath + ".vec").Should().BeTrue(); }
public void CanTrainSupervisedWithPretrainedVectors() { using var fastText = new FastTextWrapper(loggerFactory: _loggerFactory); string outPath = Path.Combine(_tempDir, "cooking"); var args = new SupervisedArgs(); args.PretrainedVectors = "cooking.unsup.300.vec"; args.dim = 300; fastText.Supervised("cooking.train.txt", outPath, args, new AutotuneArgs(), true); fastText.IsModelReady().Should().BeTrue(); fastText.GetModelDimension().Should().Be(300); fastText.ModelPath.Should().Be(outPath + ".bin"); AssertLabels(fastText.GetLabels()); File.Exists(outPath + ".bin").Should().BeTrue(); File.Exists(outPath + ".vec").Should().BeTrue(); }
static void Main(string[] args) { var model = Path.Combine(@"D:\SciSharp\CherubNLP\data", "dbpedia.bin"); using (var fastText = new FastTextWrapper()) { fastText.LoadModel(model); var vector1 = fastText.GetSentenceVector("Hello"); } var similarities = Similarity.Cosine("Power Outage -Fifth & Park - JPMC150713", new[] { "Cosine Similarity algorithm function sample.", "Power Restored -Fifth & Park - JPMC150713", "Compute the similarity of two hardcoded lists.", "We can compute the similarity of two hardcoded lists.", "Coronavirus app could trace your contacts without sacrificing your privacy" }, model); // var test = new KaggleTest(); // test.SpookyAuthorIdentification(); }
static void Main(string[] args) { if ((args.FirstOrDefault() == "nn" && args.Length < 2) || (args.FirstOrDefault() != "nn" && args.Length < 3)) { Console.WriteLine(Usage); return; } using (var fastText = new FastTextWrapper()) { switch (args[0]) { case "train": TrainSupervised(fastText, args[1], args[2]); break; case "trainlowlevel": TrainLowLevel(fastText, args[1], args[2]); break; case "load": fastText.LoadModel(args[2]); break; } if (args[0] != "nn") { Test(fastText); } else { fastText.LoadModel(File.ReadAllBytes(args[1])); TestNN(fastText); } } }
public void SkipgramAndCBowLearnDifferentRepresentations() { using var sg = new FastTextWrapper(loggerFactory: _loggerFactory); string outSG = Path.Combine(_tempDir, "cooking"); sg.Unsupervised(UnsupervisedModel.SkipGram, "cooking.train.nolabels.txt", outSG); using var cbow = new FastTextWrapper(loggerFactory: _loggerFactory); string outCbow = Path.Combine(_tempDir, "cooking"); cbow.Unsupervised(UnsupervisedModel.CBow, "cooking.train.nolabels.txt", outCbow); var nnSg = sg.GetNearestNeighbours("pot", 10); var nnCbow = cbow.GetNearestNeighbours("pot", 10); var nnSup = _fixture.FastText.GetNearestNeighbours("pot", 10); void CheckPair(Prediction[] first, Prediction[] second) { int samePredictions = 0; foreach (var prediction in first) { if (second.Any(x => x.Label == prediction.Label)) { samePredictions++; } } // We want less than a half of same predictions. samePredictions.Should().BeLessThan(first.Length / 2); } CheckPair(nnSg, nnCbow); CheckPair(nnSg, nnSup); CheckPair(nnCbow, nnSup); }
public void EmptyModelIsNotReady() { using var fastText = new FastTextWrapper(); fastText.IsModelReady().Should().BeFalse(); }