static void Main3(string[] args) { //Ensures that we are consistent against culture-specific number formating, etc... CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US"); CultureInfo.DefaultThreadCurrentCulture = culture; CultureInfo.DefaultThreadCurrentUICulture = culture; Thread.CurrentThread.CurrentCulture = culture; Thread.CurrentThread.CurrentUICulture = culture; string model = @"C:\IW\VS\PROJECTS\BOSON\Boson.Testing.CreateLuisApp\bin\x64\Debug\training\ner-caseinsensitive-fasttext"; var fastTextModel = new fastText(model + ".bin"); //var tags = File.ReadAllLines(model + @".vec").Skip(2).Where(l => l.StartsWith("nertag")).Select(l => l.Split(' ').First()).ToList(); while (true) { var text = Console.ReadLine(); var paths = FastTextNER.SentenceGraph.IdentifyEntities(fastTextModel, text, 5); paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]")))))); //var pred = fastTextModel.GetPrediction(text, 5); //pred.ForEach(p => Console.WriteLine(p.label + "[" + p.intensity.ToString("0.00") + "]")); //foreach (var t in tags) //{ // Console.WriteLine(t + " [" + fastTextModel.GetWordSimilarity(text, t).ToString("0.00") + "]"); //} } return; }
static void Main2(string[] args) { //Ensures that we are consistent against culture-specific number formating, etc... CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US"); CultureInfo.DefaultThreadCurrentCulture = culture; CultureInfo.DefaultThreadCurrentUICulture = culture; Thread.CurrentThread.CurrentCulture = culture; Thread.CurrentThread.CurrentUICulture = culture; if (!File.Exists(@"cheeseDisease.bin")) { Console.WriteLine("Please train the model first using the console version of fastText and the data supplied in the SampleData folder"); Console.WriteLine("fastText.exe supervised -input cheeseDisease.txt -output cheeseDisease"); return; } Console.WriteLine("Loading model, please wait"); var fastTextModel = new fastText(@"cheeseDisease.bin"); Console.WriteLine("... done!"); var tests = GetTestData(); int correct = 0, fail = 0, noLabel = 0; foreach (var test in tests) { var label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", ""); if (label == "n/a") { label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", ""); } Console.WriteLine($"{test.Text} -> P:{label} / C:{test.Label}"); correct += (label == test.Label) ? 1 : 0; fail += (label == test.Label) ? 0 : 1; noLabel += (label == "n/a") ? 1 : 0; } Console.WriteLine($"Summary: {correct} correctly labeled, {fail-noLabel} mislabed, {noLabel} no labels found"); Console.WriteLine("Press any key to finish!"); Console.Read(); fastText.Release(); }
static void Main(string[] args) { string model = @"C:\BigData\NLPmodels\FastText\aviation-caseinsensitive"; if (args.Length > 0) { model = args[0]; } //var words = File.ReadAllLines(model + @".vec").Skip(2).Select((l) => new WordAndSim() { word = l.Split(' ').First() }).ToList(); //words.RemoveAll(x => x.word.Length < 6); var fastTextModel = new fastText(model + @".bin"); var words = fastTextModel.GetWords(); //var utterances = File.ReadAllLines(@"C:\stanford-nlp\classifier_training\test.tsv").Select(l => new IntentExample() { Intent = l.Split('\t').First(), Example = l.Split('\t').Last().ToLowerInvariant() }).ToList(); while (true) { Console.Write("\nWord: "); var w1 = Console.ReadLine(); Console.WriteLine("\nMost similar:"); fastTextModel.GetMostSimilar(w1, 20).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00"))); Console.WriteLine("\nLeast similar:"); fastTextModel.GetLeastSimilar(w1, 5).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00"))); //Console.WriteLine(); //Console.Write("Parent: "); var parent = Console.ReadLine(); //double[] averageDiff = new double[fastTextModel.GetVectorSize()]; //double count = 0; //while(true) //{ // Console.Write("Child : "); var child = Console.ReadLine(); // if(string.IsNullOrWhiteSpace(child)) { break; } // var tmpdiff= fastTextModel.GetWordDifference(child, parent); // averageDiff = fastText.Add(averageDiff, tmpdiff); //} //fastText.Multiply(averageDiff, 1 / count); //Console.Write("New Parent : "); var newParent = Console.ReadLine(); //Console.Write("New Child : "); var newChild = Console.ReadLine(); ////var diff = fastTextModel.GetWordDifference(child, parent); //var newDiff = fastTextModel.GetWordDifference(newChild,newParent); //Console.WriteLine(string.Join("; ", averageDiff.Select(a => a.ToString("0.00")))); //Console.WriteLine(string.Join("; ", newDiff.Select(a => a.ToString("0.00")))); //Console.WriteLine("Similarity between diff vectors:" + fastText.CalculateCosineSimilarity(averageDiff, newDiff)); //if(string.IsNullOrWhiteSpace(parent) || string.IsNullOrWhiteSpace(newParent)) { break; } //var newChildVector = fastText.Add(fastTextModel.GetWordVector(newParent), averageDiff); //Console.WriteLine("Similarity between projected new parent and new child: " + fastTextModel.GetWordSimilarity(newChild, newChildVector)); //foreach (var w in words) //{ // w.sim = fastTextModel.GetWordSimilarity(w.word, newChildVector); //} //words.Sort((a, b) => b.sim.CompareTo(a.sim)); //Console.WriteLine("Most similar:"); //foreach (var w in words.Take(20)) //{ // Console.WriteLine($"\t{w.word} -> {w.sim}"); //} ////words.Reverse(); ////Console.WriteLine(); ////Console.WriteLine("Least similar:"); ////foreach (var w in words.Take(5)) ////{ //// Console.WriteLine($"\t{w.word} -> {w.sim}"); ////} //Console.WriteLine(); } fastText.Release(); //while (true) //{ // Console.Write("First word: "); var w1 = Console.ReadLine(); // Console.Write("Second word: "); var w2 = Console.ReadLine(); // Console.WriteLine($"Similarity between {w1} and {w2} is {fastText.GetWordSimilarity(w1, w2)}"); // Console.WriteLine(); //} }
public static List <SentenceInterpretation> IdentifyEntities(fastText fastTextModel, string text, int numberOfPossibilities) { var paths = new List <SentenceInterpretation>(); var words = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); //IMPROVE TOKENIZATION HERE if (words.Count == 0) { return(paths); } var tokens = new List <List <Token> >(); foreach (var w in words) { var pred = fastTextModel.GetPrediction(w, 5); if (pred.Count == 0) { pred.Add(new prediction() { label = "S_O", intensity = 1 }); } tokens.Add(pred.Select(p => new Token(w, (p.label.Contains("_") ? p.label : "S_" + p.label), p.intensity)).ToList()); } var graph = new SentenceGraph(); tokens.ForEach(t => graph.AddVertexRange(t)); //tokens.ForEach(p => Console.WriteLine("\t" + string.Join(" ", p.Select(t => t.Label + "[" + t.Intensity.ToString("0.0") + "]")))); for (int i = 0; i < (words.Count - 1); i++) { double maxIntensityC = tokens[i].Max(p => p.Intensity); double maxIntensityN = tokens[i + 1].Max(p => p.Intensity); foreach (var source in tokens[i]) { foreach (var dest in tokens[i + 1]) { string clabel = source.Label; string nlabel = dest.Label; if (!source.Label.Contains("_")) { clabel = SentenceGraph.SingleTag + "_" + source.Label; } if (!dest.Label.Contains("_")) { nlabel = SentenceGraph.SingleTag + "_" + dest.Label; } if (SentenceGraph.IsTransitionAllowed(clabel, nlabel)) { double probability = (source.Intensity / maxIntensityC) * (dest.Intensity / maxIntensityN); if (source.Intensity < 0 || dest.Intensity < 0) { probability = 0; } graph.AddEdge(new SentenceEdge(source, dest, clabel, nlabel, probability)); //Console.WriteLine($"\tFound {source.Value}[{clabel}] -> {dest.Value}[{nlabel}] with probablity {probability} and intensities {source.Intensity} and {dest.Intensity}"); } } } } var BoS = new Token("__BEGIN__", "", 1); var EoS = new Token("__END__", "", 1); graph.AddVertex(BoS); graph.AddVertex(EoS); foreach (var t in tokens.First()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed("S_O", tlabel)) { graph.AddEdge(new SentenceEdge(BoS, t, "S_O", tlabel, 1.0)); } } foreach (var t in tokens.Last()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed(tlabel, "S_O")) { graph.AddEdge(new SentenceEdge(t, EoS, tlabel, "S_O", 1.0)); } } paths = graph.GetAllPossibleSentenceInterpretations(BoS, EoS, numberOfPossibilities); //paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]" ) ) )))); return(paths); }