public BiGram Next() { var biGram = new BiGram() { First = split[pointer], Second = split[pointer + 1], Suffix = split[pointer + 2] }; pointer++; return(biGram); }
/// <summary> /// Train the model on a given corpus /// </summary> /// <param name="path">Path to the corpus database</param> public void Train(string path) { EmbeddingCorpus corpus = new EmbeddingCorpus(path); // Pre-add all words foreach (List <string> sentence in corpus.Sentences) { foreach (string word in sentence) { Add(word); } } // Learning foreach (List <string> sentence in corpus.Sentences) { var _sentence = new List <String>(sentence.Where(w => !corpus.SubSampling(w))); var _grams = BiGram.FromSentence(_sentence); foreach (var gram in _grams) { Output[gram.Right] = Vectors.Random(EmbeddingSize, UniformRandom.Distribution).Map(p => p * 0.01); // Positive sample UpdateSample(gram.Left, gram.Right, true); // Update negative samples foreach (string word in Vocabulary.Keys) { if (word == gram.Left || !corpus.NegativeSampling(word)) { continue; } UpdateSample(gram.Left, word, false); } // Update hidden layer for (int i = 0; i < EmbeddingSize; i++) { Vocabulary[gram.Left][i] += Error[i]; } } } }
public void Add(BiGram biGram) { if (biGram.First.Trim().Length > 0 && biGram.Second.Trim().Length > 0) { if (this.map.ContainsKey(biGram.Hash())) { var updateGram = this.map[biGram.Hash()]; if (updateGram.ContainsKey(biGram.Suffix)) { updateGram[biGram.Suffix]++; } else { updateGram.Add(biGram.Suffix, 1); } } else { this.map.Add(biGram.Hash(), new Dictionary <string, int>()); this.map[biGram.Hash()].Add(biGram.Suffix, 1); } } }