Beispiel #1
0
        static void Main3(string[] args)
        {
            //Ensures that we are consistent against culture-specific number formating, etc...
            CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US");

            CultureInfo.DefaultThreadCurrentCulture   = culture;
            CultureInfo.DefaultThreadCurrentUICulture = culture;

            Thread.CurrentThread.CurrentCulture   = culture;
            Thread.CurrentThread.CurrentUICulture = culture;

            string model         = @"C:\IW\VS\PROJECTS\BOSON\Boson.Testing.CreateLuisApp\bin\x64\Debug\training\ner-caseinsensitive-fasttext";
            var    fastTextModel = new fastText(model + ".bin");

            //var tags = File.ReadAllLines(model + @".vec").Skip(2).Where(l => l.StartsWith("nertag")).Select(l => l.Split(' ').First()).ToList();

            while (true)
            {
                var text = Console.ReadLine();

                var paths = FastTextNER.SentenceGraph.IdentifyEntities(fastTextModel, text, 5);
                paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]"))))));


                //var pred = fastTextModel.GetPrediction(text, 5);
                //pred.ForEach(p => Console.WriteLine(p.label + "[" + p.intensity.ToString("0.00") + "]"));
                //foreach (var t in tags)
                //{
                //    Console.WriteLine(t + " [" + fastTextModel.GetWordSimilarity(text, t).ToString("0.00") + "]");
                //}
            }
            return;
        }
Beispiel #2
0
        static void Main2(string[] args)
        {
            //Ensures that we are consistent against culture-specific number formating, etc...
            CultureInfo culture = CultureInfo.CreateSpecificCulture("en-US");

            CultureInfo.DefaultThreadCurrentCulture   = culture;
            CultureInfo.DefaultThreadCurrentUICulture = culture;

            Thread.CurrentThread.CurrentCulture   = culture;
            Thread.CurrentThread.CurrentUICulture = culture;


            if (!File.Exists(@"cheeseDisease.bin"))
            {
                Console.WriteLine("Please train the model first using the console version of fastText and the data supplied in the SampleData folder");
                Console.WriteLine("fastText.exe supervised -input cheeseDisease.txt -output cheeseDisease");
                return;
            }

            Console.WriteLine("Loading model, please wait");
            var fastTextModel = new fastText(@"cheeseDisease.bin");

            Console.WriteLine("... done!");

            var tests = GetTestData();
            int correct = 0, fail = 0, noLabel = 0;

            foreach (var test in tests)
            {
                var label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", "");
                if (label == "n/a")
                {
                    label = fastTextModel.GetPrediction(test.Text, 1).First().label.Replace("__label__", "").Replace("__", "");
                }
                Console.WriteLine($"{test.Text} -> P:{label} / C:{test.Label}");
                correct += (label == test.Label) ? 1 : 0;
                fail    += (label == test.Label) ? 0 : 1;
                noLabel += (label == "n/a") ? 1 : 0;
            }

            Console.WriteLine($"Summary: {correct} correctly labeled, {fail-noLabel} mislabed, {noLabel} no labels found");
            Console.WriteLine("Press any key to finish!");
            Console.Read();

            fastText.Release();
        }
Beispiel #3
0
        static void Main(string[] args)
        {
            string model = @"C:\BigData\NLPmodels\FastText\aviation-caseinsensitive";

            if (args.Length > 0)
            {
                model = args[0];
            }

            //var words = File.ReadAllLines(model + @".vec").Skip(2).Select((l) => new WordAndSim() { word = l.Split(' ').First() }).ToList();
            //words.RemoveAll(x => x.word.Length < 6);


            var fastTextModel = new fastText(model + @".bin");

            var words = fastTextModel.GetWords();


            //var utterances = File.ReadAllLines(@"C:\stanford-nlp\classifier_training\test.tsv").Select(l => new IntentExample() { Intent = l.Split('\t').First(), Example = l.Split('\t').Last().ToLowerInvariant() }).ToList();


            while (true)
            {
                Console.Write("\nWord: "); var w1 = Console.ReadLine();

                Console.WriteLine("\nMost similar:");
                fastTextModel.GetMostSimilar(w1, 20).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00")));

                Console.WriteLine("\nLeast similar:");
                fastTextModel.GetLeastSimilar(w1, 5).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00")));



                //Console.WriteLine();

                //Console.Write("Parent: "); var parent = Console.ReadLine();

                //double[] averageDiff = new double[fastTextModel.GetVectorSize()];
                //double count = 0;
                //while(true)
                //{
                //    Console.Write("Child : "); var child = Console.ReadLine();

                //    if(string.IsNullOrWhiteSpace(child)) { break; }

                //    var tmpdiff= fastTextModel.GetWordDifference(child, parent);
                //    averageDiff = fastText.Add(averageDiff, tmpdiff);
                //}

                //fastText.Multiply(averageDiff, 1 / count);


                //Console.Write("New Parent : "); var newParent = Console.ReadLine();
                //Console.Write("New Child  : "); var newChild  = Console.ReadLine();

                ////var diff = fastTextModel.GetWordDifference(child, parent);
                //var newDiff = fastTextModel.GetWordDifference(newChild,newParent);

                //Console.WriteLine(string.Join("; ", averageDiff.Select(a => a.ToString("0.00"))));
                //Console.WriteLine(string.Join("; ", newDiff.Select(a => a.ToString("0.00"))));

                //Console.WriteLine("Similarity between diff vectors:" + fastText.CalculateCosineSimilarity(averageDiff, newDiff));



                //if(string.IsNullOrWhiteSpace(parent) || string.IsNullOrWhiteSpace(newParent)) { break; }


                //var newChildVector = fastText.Add(fastTextModel.GetWordVector(newParent), averageDiff);

                //Console.WriteLine("Similarity between projected new parent and new child: " + fastTextModel.GetWordSimilarity(newChild, newChildVector));


                //foreach (var w in words)
                //{
                //    w.sim = fastTextModel.GetWordSimilarity(w.word, newChildVector);
                //}

                //words.Sort((a, b) => b.sim.CompareTo(a.sim));


                //Console.WriteLine("Most similar:");
                //foreach (var w in words.Take(20))
                //{
                //    Console.WriteLine($"\t{w.word} -> {w.sim}");
                //}

                ////words.Reverse();

                ////Console.WriteLine();

                ////Console.WriteLine("Least similar:");
                ////foreach (var w in words.Take(5))
                ////{
                ////    Console.WriteLine($"\t{w.word} -> {w.sim}");
                ////}



                //Console.WriteLine();
            }

            fastText.Release();
            //while (true)
            //{
            //    Console.Write("First word: "); var w1 = Console.ReadLine();
            //    Console.Write("Second word: "); var w2 = Console.ReadLine();
            //    Console.WriteLine($"Similarity between {w1} and {w2} is {fastText.GetWordSimilarity(w1, w2)}");
            //    Console.WriteLine();
            //}
        }
Beispiel #4
0
            public static List <SentenceInterpretation> IdentifyEntities(fastText fastTextModel, string text, int numberOfPossibilities)
            {
                var paths = new List <SentenceInterpretation>();

                var words = text.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); //IMPROVE TOKENIZATION HERE

                if (words.Count == 0)
                {
                    return(paths);
                }

                var tokens = new List <List <Token> >();

                foreach (var w in words)
                {
                    var pred = fastTextModel.GetPrediction(w, 5);

                    if (pred.Count == 0)
                    {
                        pred.Add(new prediction()
                        {
                            label = "S_O", intensity = 1
                        });
                    }

                    tokens.Add(pred.Select(p => new Token(w, (p.label.Contains("_") ? p.label : "S_" + p.label), p.intensity)).ToList());
                }

                var graph = new SentenceGraph();

                tokens.ForEach(t => graph.AddVertexRange(t));

                //tokens.ForEach(p => Console.WriteLine("\t" + string.Join(" ", p.Select(t => t.Label + "[" + t.Intensity.ToString("0.0") + "]"))));


                for (int i = 0; i < (words.Count - 1); i++)
                {
                    double maxIntensityC = tokens[i].Max(p => p.Intensity);
                    double maxIntensityN = tokens[i + 1].Max(p => p.Intensity);
                    foreach (var source in tokens[i])
                    {
                        foreach (var dest in tokens[i + 1])
                        {
                            string clabel = source.Label;
                            string nlabel = dest.Label;
                            if (!source.Label.Contains("_"))
                            {
                                clabel = SentenceGraph.SingleTag + "_" + source.Label;
                            }
                            if (!dest.Label.Contains("_"))
                            {
                                nlabel = SentenceGraph.SingleTag + "_" + dest.Label;
                            }
                            if (SentenceGraph.IsTransitionAllowed(clabel, nlabel))
                            {
                                double probability = (source.Intensity / maxIntensityC) * (dest.Intensity / maxIntensityN);
                                if (source.Intensity < 0 || dest.Intensity < 0)
                                {
                                    probability = 0;
                                }

                                graph.AddEdge(new SentenceEdge(source, dest, clabel, nlabel, probability));
                                //Console.WriteLine($"\tFound {source.Value}[{clabel}] -> {dest.Value}[{nlabel}] with probablity {probability} and intensities {source.Intensity} and {dest.Intensity}");
                            }
                        }
                    }
                }

                var BoS = new Token("__BEGIN__", "", 1);
                var EoS = new Token("__END__", "", 1);

                graph.AddVertex(BoS); graph.AddVertex(EoS);

                foreach (var t in tokens.First())
                {
                    string tlabel = t.Label;
                    if (!t.Label.Contains("_"))
                    {
                        tlabel = SentenceGraph.SingleTag + "_" + t.Label;
                    }
                    if (SentenceGraph.IsTransitionAllowed("S_O", tlabel))
                    {
                        graph.AddEdge(new SentenceEdge(BoS, t, "S_O", tlabel, 1.0));
                    }
                }

                foreach (var t in tokens.Last())
                {
                    string tlabel = t.Label;
                    if (!t.Label.Contains("_"))
                    {
                        tlabel = SentenceGraph.SingleTag + "_" + t.Label;
                    }
                    if (SentenceGraph.IsTransitionAllowed(tlabel, "S_O"))
                    {
                        graph.AddEdge(new SentenceEdge(t, EoS, tlabel, "S_O", 1.0));
                    }
                }


                paths = graph.GetAllPossibleSentenceInterpretations(BoS, EoS, numberOfPossibilities);

                //paths.ForEach(p => Console.WriteLine("Probability: " + p.Probability.ToString("0.00") + "\t" + string.Join(" ", p.Sentence.Select(n => string.Join(" ", n.Tokens.Select(t => t.Value + "[" + t.Label + "]" ) ) ))));
                return(paths);
            }