static void Main2(string[] args) { //Ensures that we are consistent against culture-specific number formating, etc... CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US"); CultureInfo.DefaultThreadCurrentCulture = culture; CultureInfo.DefaultThreadCurrentUICulture = culture; Thread.CurrentThread.CurrentCulture = culture; Thread.CurrentThread.CurrentUICulture = culture; if (!File.Exists(@"cheeseDisease.bin")) { Console.WriteLine("Please train the model first using the console version of fastText and the data supplied in the SampleData folder"); Console.WriteLine("fastText.exe supervised -input cheeseDisease.txt -output cheeseDisease"); return; } Console.WriteLine("Loading model, please wait"); var fastTextModel = new fastText(@"cheeseDisease.bin"); Console.WriteLine("... done!"); var tests = GetTestData(); int correct = 0, fail = 0, noLabel = 0; foreach (var test in tests) { var label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", ""); if (label == "n/a") { label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", ""); } Console.WriteLine($"{test.Text} -> P:{label} / C:{test.Label}"); correct += (label == test.Label) ? 1 : 0; fail += (label == test.Label) ? 0 : 1; noLabel += (label == "n/a") ? 1 : 0; } Console.WriteLine($"Summary: {correct} correctly labeled, {fail-noLabel} mislabed, {noLabel} no labels found"); Console.WriteLine("Press any key to finish!"); Console.Read(); fastText.Release(); }
public static List <SentenceInterpretation> IdentifyEntities(fastText fastTextModel, string text, int numberOfPossibilities) { var paths = new List <SentenceInterpretation>(); var words = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); //IMPROVE TOKENIZATION HERE if (words.Count == 0) { return(paths); } var tokens = new List <List <Token> >(); foreach (var w in words) { var pred = fastTextModel.GetPrediction(w, 5); if (pred.Count == 0) { pred.Add(new prediction() { label = "S_O", intensity = 1 }); } tokens.Add(pred.Select(p => new Token(w, (p.label.Contains("_") ? p.label : "S_" + p.label), p.intensity)).ToList()); } var graph = new SentenceGraph(); tokens.ForEach(t => graph.AddVertexRange(t)); //tokens.ForEach(p => Console.WriteLine("\t" + string.Join(" ", p.Select(t => t.Label + "[" + t.Intensity.ToString("0.0") + "]")))); for (int i = 0; i < (words.Count - 1); i++) { double maxIntensityC = tokens[i].Max(p => p.Intensity); double maxIntensityN = tokens[i + 1].Max(p => p.Intensity); foreach (var source in tokens[i]) { foreach (var dest in tokens[i + 1]) { string clabel = source.Label; string nlabel = dest.Label; if (!source.Label.Contains("_")) { clabel = SentenceGraph.SingleTag + "_" + source.Label; } if (!dest.Label.Contains("_")) { nlabel = SentenceGraph.SingleTag + "_" + dest.Label; } if (SentenceGraph.IsTransitionAllowed(clabel, nlabel)) { double probability = (source.Intensity / maxIntensityC) * (dest.Intensity / maxIntensityN); if (source.Intensity < 0 || dest.Intensity < 0) { probability = 0; } graph.AddEdge(new SentenceEdge(source, dest, clabel, nlabel, probability)); //Console.WriteLine($"\tFound {source.Value}[{clabel}] -> {dest.Value}[{nlabel}] with probablity {probability} and intensities {source.Intensity} and {dest.Intensity}"); } } } } var BoS = new Token("__BEGIN__", "", 1); var EoS = new Token("__END__", "", 1); graph.AddVertex(BoS); graph.AddVertex(EoS); foreach (var t in tokens.First()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed("S_O", tlabel)) { graph.AddEdge(new SentenceEdge(BoS, t, "S_O", tlabel, 1.0)); } } foreach (var t in tokens.Last()) { string tlabel = t.Label; if (!t.Label.Contains("_")) { tlabel = SentenceGraph.SingleTag + "_" + t.Label; } if (SentenceGraph.IsTransitionAllowed(tlabel, "S_O")) { graph.AddEdge(new SentenceEdge(t, EoS, tlabel, "S_O", 1.0)); } } paths = graph.GetAllPossibleSentenceInterpretations(BoS, EoS, numberOfPossibilities); //paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]" ) ) )))); return(paths); }