Пример #1
0
        private VectorizedDictionary Train(Corpus.Corpus corpus, bool cBow)
        {
            var parameter = new WordToVecParameter();

            parameter.SetCbow(cBow);
            var neuralNetwork = new NeuralNetwork(corpus, parameter);

            return(neuralNetwork.Train());
        }
 /**
  * <summary>Constructor for the {@link NeuralNetwork} class. Gets corpus and network parameters as input and sets the
  * corresponding parameters first. After that, initializes the network with random weights between -0.5 and 0.5.
  * Constructs vector update matrix and prepares the exp table.</summary>
  * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param>
  * <param name="parameter">Parameters of the Word2Vec algorithm.</param>
  */
 public NeuralNetwork(Corpus.Corpus corpus, WordToVecParameter parameter)
 {
     this._vocabulary  = new Vocabulary(corpus);
     this._parameter   = parameter;
     this._corpus      = corpus;
     _wordVectors      = new Matrix(_vocabulary.Size(), parameter.GetLayerSize(), -0.5, 0.5, new Random());
     _wordVectorUpdate = new Matrix(_vocabulary.Size(), parameter.GetLayerSize());
     PrepareExpTable();
 }
Пример #3
0
        public Corpus.Corpus Convert(IConstituencyToDependencyTreeConverter constituencyToDependencyTreeConverter)
        {
            var annotatedCorpus = new Corpus.Corpus();

            for (var i = 0; i < _treeBank.Size(); i++)
            {
                annotatedCorpus.AddSentence(constituencyToDependencyTreeConverter.Convert(_treeBank.Get(i), null));
            }
            return(annotatedCorpus);
        }
Пример #4
0
        /**
         * <summary>Constructor for the {@link Vocabulary} class. For each distinct word in the corpus, a {@link VocabularyWord}
         * instance is created. After that, words are sorted according to their occurrences. Unigram table is constructed,
         * where after Huffman tree is created based on the number of occurrences of the words.</summary>
         * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param>
         */
        public Vocabulary(Corpus.Corpus corpus)
        {
            var wordList = corpus.GetWordList();

            _vocabulary = new List <VocabularyWord>();
            foreach (var word in wordList)
            {
                _vocabulary.Add(new VocabularyWord(word.GetName(), corpus.GetCount(word)));
            }
            _vocabulary.Sort();
            CreateUniGramTable();
            ConstructHuffmanTree();
            _vocabulary.Sort(new TurkishWordComparator());
        }
Пример #5
0
 public void Setup()
 {
     english = new Corpus.Corpus("../../../english-similarity-dataset.txt");
     turkish = new Corpus.Corpus("../../../turkish-similarity-dataset.txt");
 }
Пример #6
0
 public void Setup()
 {
     corpus       = new Corpus.Corpus("../../../corpus.txt");
     simpleCorpus = new Corpus.Corpus("../../../simplecorpus.txt");
 }