Пример #1
0
        public async Task Test()
        {
            var vocab = new LRVocab().Create(VOCAB_PATH, (x) => log.Invoke(x));

            if (!vocab.IsReady)
            {
                return;
            }

            log($"Words count: {vocab.Vocabulary.Words}");
            log($"Size: {vocab.Vocabulary.VectorDimensionsCount}");

            var boyToGirl = vocab.Vocabulary.GetSummRepresentationOrNullForPhrase("An unusual galaxy far, far away is stumping astronomers not because of what’s there, but because of what’s missing");

            log($"Analogy: {boyToGirl.WordOrNull}");
        }
Пример #2
0
        public void TestWords(Log log)
        {
            var net     = new SNeuralNet(6, 2, 300, 1);
            var wordBag = WordBag.CreateToWords(string.Join(". ", Configuration.RawDataList), 1);

            var trainSets = new List <Tuple <double[], double[]> >();

            var wordsHistory = new List <string>();
            var vocab        = new LRVocab().Create(Configuration.VocabularyPath, (string s) => log(s));

            log("Prepare tests list");

            foreach (var word in wordBag.Read())
            {
                var w = word[0];

                if (!vocab.Vocabulary.ContainsWord(w))
                {
                    continue;
                }

                wordsHistory.Add(w);
                if (wordsHistory.Count < 4)
                {
                    continue;
                }
                if (wordsHistory.Count > 4)
                {
                    wordsHistory.RemoveAt(0);
                }

                double[] input   = new double[6];
                double[] correct = new double[1];

                input[0] = vocab.Vocabulary.GetRepresentationOrNullFor(wordsHistory[0]).MetricLength;
                input[1] = vocab.Vocabulary.GetRepresentationOrNullFor(wordsHistory[1]).MetricLength;
                input[2] = vocab.Vocabulary.GetRepresentationOrNullFor(wordsHistory[2]).MetricLength;

                input[3] = vocab.Vocabulary.GetSummRepresentationOrNullForPhrase(wordsHistory.Take(2).ToArray())?.MetricLength ?? 0d;
                input[4] = vocab.Vocabulary.GetSummRepresentationOrNullForPhrase(wordsHistory.Skip(1).Take(2).ToArray())?.MetricLength ?? 0d;
                input[5] = vocab.Vocabulary.GetSummRepresentationOrNullForPhrase(wordsHistory.Take(3).ToArray())?.MetricLength ?? 0d;

                correct = new double[] { vocab.Vocabulary.GetRepresentationFor(wordsHistory[3]).MetricLength };

                trainSets.Add(new Tuple <double[], double[]>(input, correct));
            }

            if (trainSets.Count == 0)
            {
                log("No train sets");
                return;
            }

            log($"Train sets count: {trainSets.Count}");
            log($"Train starts");

            var trainer = new NeuralNetTrainer()
                          .SetDataSets(trainSets.ToArray())
                          .SetNet(net);

            trainer.EpochsCount = 150;
            trainer.LearnRate   = 0.001;

            trainer.SimpleTrain();
            log("Train end");

            foreach (var set in trainSets)
            {
                log($"Input: {set.Item1[0]} {set.Item1[1]} {set.Item1[2]}\t Result: {net.Activate(set.Item1)}");
            }
        }
Пример #3
0
        public void Test(Log log)
        {
            var voc = new LRVocab().Create("D:/vectors/google_vokab.bin", (data) => log(data));

            foreach (var p in Phrases)
            {
                var wb = WordBag.CreateToWords(p, 3);
                var pb = WordBag.CreateToPhrases(p, 1);

                log($"Process phrase '{p}'");

                List <Representation> wordsResults   = new List <Representation>();
                List <Representation> phrasesResults = new List <Representation>();


                log("ToWords representation");

                foreach (var data in wb.Read())
                {
                    var s1 = voc.Vocabulary.GetSummRepresentationOrNullForPhrase(data[0]);
                    var s2 = voc.Vocabulary.GetSummRepresentationOrNullForPhrase(data[1]);
                    var s3 = voc.Vocabulary.GetSummRepresentationOrNullForPhrase(data[2]);

                    Representation[] s      = { s1, s2, s3 };
                    Representation   result = null;

                    foreach (var ss in s)
                    {
                        if (ss == null)
                        {
                            continue;
                        }
                        if (result == null)
                        {
                            result = ss;
                        }
                        else
                        {
                            result.Add(ss);
                        }
                    }



                    if (result == null)
                    {
                        continue;
                    }

                    log($"{data[0]} + {data[1]} + {data[2]} => {result.WordOrNull} ({result.MetricLength})");

                    if (!string.IsNullOrWhiteSpace(result.WordOrNull))
                    {
                        wordsResults.Add(result);
                    }
                }

                log("words results:");
                log(string.Join(" ", wordsResults.Select(x => x.WordOrNull)));
                log("");

                log("phrases represuntation");
                foreach (var data in pb.Read())
                {
                    var result = voc.Vocabulary.GetSummRepresentationOrNullForPhrase(data);
                    if (result == null)
                    {
                        continue;
                    }

                    log($"{data[0]} => {result.WordOrNull} ({result.MetricLength})");
                    if (!string.IsNullOrWhiteSpace(result.WordOrNull))
                    {
                        phrasesResults.Add(result);
                    }
                }
                log("phrase results:");
                log(string.Join(" ", phrasesResults.Select(x => x.WordOrNull)));
                log("");
                log("------");
            }
        }