private VectorizedDictionary Train(Corpus.Corpus corpus, bool cBow) { var parameter = new WordToVecParameter(); parameter.SetCbow(cBow); var neuralNetwork = new NeuralNetwork(corpus, parameter); return(neuralNetwork.Train()); }
/** * <summary>Constructor for the {@link NeuralNetwork} class. Gets corpus and network parameters as input and sets the * corresponding parameters first. After that, initializes the network with random weights between -0.5 and 0.5. * Constructs vector update matrix and prepares the exp table.</summary> * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param> * <param name="parameter">Parameters of the Word2Vec algorithm.</param> */ public NeuralNetwork(Corpus.Corpus corpus, WordToVecParameter parameter) { this._vocabulary = new Vocabulary(corpus); this._parameter = parameter; this._corpus = corpus; _wordVectors = new Matrix(_vocabulary.Size(), parameter.GetLayerSize(), -0.5, 0.5, new Random()); _wordVectorUpdate = new Matrix(_vocabulary.Size(), parameter.GetLayerSize()); PrepareExpTable(); }
public Corpus.Corpus Convert(IConstituencyToDependencyTreeConverter constituencyToDependencyTreeConverter) { var annotatedCorpus = new Corpus.Corpus(); for (var i = 0; i < _treeBank.Size(); i++) { annotatedCorpus.AddSentence(constituencyToDependencyTreeConverter.Convert(_treeBank.Get(i), null)); } return(annotatedCorpus); }
/** * <summary>Constructor for the {@link Vocabulary} class. For each distinct word in the corpus, a {@link VocabularyWord} * instance is created. After that, words are sorted according to their occurrences. Unigram table is constructed, * where after Huffman tree is created based on the number of occurrences of the words.</summary> * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param> */ public Vocabulary(Corpus.Corpus corpus) { var wordList = corpus.GetWordList(); _vocabulary = new List <VocabularyWord>(); foreach (var word in wordList) { _vocabulary.Add(new VocabularyWord(word.GetName(), corpus.GetCount(word))); } _vocabulary.Sort(); CreateUniGramTable(); ConstructHuffmanTree(); _vocabulary.Sort(new TurkishWordComparator()); }
public void Setup() { english = new Corpus.Corpus("../../../english-similarity-dataset.txt"); turkish = new Corpus.Corpus("../../../turkish-similarity-dataset.txt"); }
public void Setup() { corpus = new Corpus.Corpus("../../../corpus.txt"); simpleCorpus = new Corpus.Corpus("../../../simplecorpus.txt"); }