Exemplo n.º 1
0
        static void Main2(string[] args)
        {
            //Ensures that we are consistent against culture-specific number formating, etc...
            CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US");

            CultureInfo.DefaultThreadCurrentCulture   = culture;
            CultureInfo.DefaultThreadCurrentUICulture = culture;

            Thread.CurrentThread.CurrentCulture   = culture;
            Thread.CurrentThread.CurrentUICulture = culture;


            if (!File.Exists(@"cheeseDisease.bin"))
            {
                Console.WriteLine("Please train the model first using the console version of fastText and the data supplied in the SampleData folder");
                Console.WriteLine("fastText.exe supervised -input cheeseDisease.txt -output cheeseDisease");
                return;
            }

            Console.WriteLine("Loading model, please wait");
            var fastTextModel = new fastText(@"cheeseDisease.bin");

            Console.WriteLine("... done!");

            var tests = GetTestData();
            int correct = 0, fail = 0, noLabel = 0;

            foreach (var test in tests)
            {
                var label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", "");
                if (label == "n/a")
                {
                    label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", "");
                }
                Console.WriteLine($"{test.Text} -> P:{label} / C:{test.Label}");
                correct += (label == test.Label) ? 1 : 0;
                fail    += (label == test.Label) ? 0 : 1;
                noLabel += (label == "n/a") ? 1 : 0;
            }

            Console.WriteLine($"Summary: {correct} correctly labeled, {fail-noLabel} mislabed, {noLabel} no labels found");
            Console.WriteLine("Press any key to finish!");
            Console.Read();

            fastText.Release();
        }
Exemplo n.º 2
0
            public static List <SentenceInterpretation> IdentifyEntities(fastText fastTextModel, string text, int numberOfPossibilities)
            {
                var paths = new List <SentenceInterpretation>();

                var words = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); //IMPROVE TOKENIZATION HERE

                if (words.Count == 0)
                {
                    return(paths);
                }

                var tokens = new List <List <Token> >();

                foreach (var w in words)
                {
                    var pred = fastTextModel.GetPrediction(w, 5);

                    if (pred.Count == 0)
                    {
                        pred.Add(new prediction()
                        {
                            label = "S_O", intensity = 1
                        });
                    }

                    tokens.Add(pred.Select(p => new Token(w, (p.label.Contains("_") ? p.label : "S_" + p.label), p.intensity)).ToList());
                }

                var graph = new SentenceGraph();

                tokens.ForEach(t => graph.AddVertexRange(t));

                //tokens.ForEach(p => Console.WriteLine("\t" + string.Join(" ", p.Select(t => t.Label + "[" + t.Intensity.ToString("0.0") + "]"))));


                for (int i = 0; i < (words.Count - 1); i++)
                {
                    double maxIntensityC = tokens[i].Max(p => p.Intensity);
                    double maxIntensityN = tokens[i + 1].Max(p => p.Intensity);
                    foreach (var source in tokens[i])
                    {
                        foreach (var dest in tokens[i + 1])
                        {
                            string clabel = source.Label;
                            string nlabel = dest.Label;
                            if (!source.Label.Contains("_"))
                            {
                                clabel = SentenceGraph.SingleTag + "_" + source.Label;
                            }
                            if (!dest.Label.Contains("_"))
                            {
                                nlabel = SentenceGraph.SingleTag + "_" + dest.Label;
                            }
                            if (SentenceGraph.IsTransitionAllowed(clabel, nlabel))
                            {
                                double probability = (source.Intensity / maxIntensityC) * (dest.Intensity / maxIntensityN);
                                if (source.Intensity < 0 || dest.Intensity < 0)
                                {
                                    probability = 0;
                                }

                                graph.AddEdge(new SentenceEdge(source, dest, clabel, nlabel, probability));
                                //Console.WriteLine($"\tFound {source.Value}[{clabel}] -> {dest.Value}[{nlabel}] with probablity {probability} and intensities {source.Intensity} and {dest.Intensity}");
                            }
                        }
                    }
                }

                var BoS = new Token("__BEGIN__", "", 1);
                var EoS = new Token("__END__", "", 1);

                graph.AddVertex(BoS); graph.AddVertex(EoS);

                foreach (var t in tokens.First())
                {
                    string tlabel = t.Label;
                    if (!t.Label.Contains("_"))
                    {
                        tlabel = SentenceGraph.SingleTag + "_" + t.Label;
                    }
                    if (SentenceGraph.IsTransitionAllowed("S_O", tlabel))
                    {
                        graph.AddEdge(new SentenceEdge(BoS, t, "S_O", tlabel, 1.0));
                    }
                }

                foreach (var t in tokens.Last())
                {
                    string tlabel = t.Label;
                    if (!t.Label.Contains("_"))
                    {
                        tlabel = SentenceGraph.SingleTag + "_" + t.Label;
                    }
                    if (SentenceGraph.IsTransitionAllowed(tlabel, "S_O"))
                    {
                        graph.AddEdge(new SentenceEdge(t, EoS, tlabel, "S_O", 1.0));
                    }
                }


                paths = graph.GetAllPossibleSentenceInterpretations(BoS, EoS, numberOfPossibilities);

                //paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]" ) ) ))));
                return(paths);
            }