Exemple #1
0
    public BiGram Next()
    {
        var biGram = new BiGram()
        {
            First = split[pointer], Second = split[pointer + 1], Suffix = split[pointer + 2]
        };

        pointer++;
        return(biGram);
    }
Exemple #2
0
        /// <summary>
        /// Train the model on a given corpus
        /// </summary>
        /// <param name="path">Path to the corpus database</param>
        public void Train(string path)
        {
            EmbeddingCorpus corpus = new EmbeddingCorpus(path);

            // Pre-add all words
            foreach (List <string> sentence in corpus.Sentences)
            {
                foreach (string word in sentence)
                {
                    Add(word);
                }
            }

            // Learning
            foreach (List <string> sentence in corpus.Sentences)
            {
                var _sentence = new List <String>(sentence.Where(w => !corpus.SubSampling(w)));
                var _grams    = BiGram.FromSentence(_sentence);

                foreach (var gram in _grams)
                {
                    Output[gram.Right] = Vectors.Random(EmbeddingSize, UniformRandom.Distribution).Map(p => p * 0.01);

                    // Positive sample
                    UpdateSample(gram.Left, gram.Right, true);

                    // Update negative samples
                    foreach (string word in Vocabulary.Keys)
                    {
                        if (word == gram.Left || !corpus.NegativeSampling(word))
                        {
                            continue;
                        }
                        UpdateSample(gram.Left, word, false);
                    }

                    // Update hidden layer
                    for (int i = 0; i < EmbeddingSize; i++)
                    {
                        Vocabulary[gram.Left][i] += Error[i];
                    }
                }
            }
        }
Exemple #3
0
 public void Add(BiGram biGram)
 {
     if (biGram.First.Trim().Length > 0 && biGram.Second.Trim().Length > 0)
     {
         if (this.map.ContainsKey(biGram.Hash()))
         {
             var updateGram = this.map[biGram.Hash()];
             if (updateGram.ContainsKey(biGram.Suffix))
             {
                 updateGram[biGram.Suffix]++;
             }
             else
             {
                 updateGram.Add(biGram.Suffix, 1);
             }
         }
         else
         {
             this.map.Add(biGram.Hash(), new Dictionary <string, int>());
             this.map[biGram.Hash()].Add(biGram.Suffix, 1);
         }
     }
 }