Exemplo n.º 1
0
        static void Main(string[] args)
        {
            string model = @"C:\BigData\NLPmodels\FastText\aviation-caseinsensitive";

            if (args.Length > 0)
            {
                model = args[0];
            }

            //var words = File.ReadAllLines(model + @".vec").Skip(2).Select((l) => new WordAndSim() { word = l.Split(' ').First() }).ToList();
            //words.RemoveAll(x => x.word.Length < 6);


            var fastTextModel = new fastText(model + @".bin");

            var words = fastTextModel.GetWords();


            //var utterances = File.ReadAllLines(@"C:\stanford-nlp\classifier_training\test.tsv").Select(l => new IntentExample() { Intent = l.Split('\t').First(), Example = l.Split('\t').Last().ToLowerInvariant() }).ToList();


            while (true)
            {
                Console.Write("\nWord: "); var w1 = Console.ReadLine();

                Console.WriteLine("\nMost similar:");
                fastTextModel.GetMostSimilar(w1, 20).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00")));

                Console.WriteLine("\nLeast similar:");
                fastTextModel.GetLeastSimilar(w1, 5).ForEach(ws => Console.WriteLine("\t" + ws.Item1.PadLeft(15) + " " + ws.Item2.ToString("0.00")));



                //Console.WriteLine();

                //Console.Write("Parent: "); var parent = Console.ReadLine();

                //double[] averageDiff = new double[fastTextModel.GetVectorSize()];
                //double count = 0;
                //while(true)
                //{
                //    Console.Write("Child : "); var child = Console.ReadLine();

                //    if(string.IsNullOrWhiteSpace(child)) { break; }

                //    var tmpdiff= fastTextModel.GetWordDifference(child, parent);
                //    averageDiff = fastText.Add(averageDiff, tmpdiff);
                //}

                //fastText.Multiply(averageDiff, 1 / count);


                //Console.Write("New Parent : "); var newParent = Console.ReadLine();
                //Console.Write("New Child  : "); var newChild  = Console.ReadLine();

                ////var diff = fastTextModel.GetWordDifference(child, parent);
                //var newDiff = fastTextModel.GetWordDifference(newChild,newParent);

                //Console.WriteLine(string.Join("; ", averageDiff.Select(a => a.ToString("0.00"))));
                //Console.WriteLine(string.Join("; ", newDiff.Select(a => a.ToString("0.00"))));

                //Console.WriteLine("Similarity between diff vectors:" + fastText.CalculateCosineSimilarity(averageDiff, newDiff));



                //if(string.IsNullOrWhiteSpace(parent) || string.IsNullOrWhiteSpace(newParent)) { break; }


                //var newChildVector = fastText.Add(fastTextModel.GetWordVector(newParent), averageDiff);

                //Console.WriteLine("Similarity between projected new parent and new child: " + fastTextModel.GetWordSimilarity(newChild, newChildVector));


                //foreach (var w in words)
                //{
                //    w.sim = fastTextModel.GetWordSimilarity(w.word, newChildVector);
                //}

                //words.Sort((a, b) => b.sim.CompareTo(a.sim));


                //Console.WriteLine("Most similar:");
                //foreach (var w in words.Take(20))
                //{
                //    Console.WriteLine($"\t{w.word} -> {w.sim}");
                //}

                ////words.Reverse();

                ////Console.WriteLine();

                ////Console.WriteLine("Least similar:");
                ////foreach (var w in words.Take(5))
                ////{
                ////    Console.WriteLine($"\t{w.word} -> {w.sim}");
                ////}



                //Console.WriteLine();
            }

            fastText.Release();
            //while (true)
            //{
            //    Console.Write("First word: "); var w1 = Console.ReadLine();
            //    Console.Write("Second word: "); var w2 = Console.ReadLine();
            //    Console.WriteLine($"Similarity between {w1} and {w2} is {fastText.GetWordSimilarity(w1, w2)}");
            //    Console.WriteLine();
            //}
        }