Пример #1
0
 /**
  * <summary>Constructor for the {@link Iteration} class. Get corpus and parameter as input, sets the corresponding
  * parameters.</summary>
  * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param>
  * <param name="wordToVecParameter">Parameters of the Word2Vec algorithm.</param>
  */
 public Iteration(Corpus.Corpus corpus, WordToVecParameter wordToVecParameter)
 {
     this.corpus = corpus;
     this._wordToVecParameter = wordToVecParameter;
     _startingAlpha           = wordToVecParameter.GetAlpha();
     _alpha = wordToVecParameter.GetAlpha();
 }
        public void TestDeasciify()
        {
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, true);
            var simpleAsciifier  = new SimpleAsciifier();
            var corpus           = new Corpus.Corpus("../../../corpus.txt");

            for (var i = 0; i < corpus.SentenceCount(); i++)
            {
                var sentence = corpus.GetSentence(i);
                for (var j = 1; j < sentence.WordCount(); j++)
                {
                    if (fsm.MorphologicalAnalysis(sentence.GetWord(j).GetName()).Size() > 0)
                    {
                        var asciified = simpleAsciifier.Asciify(sentence.GetWord(j));
                        if (!asciified.Equals(sentence.GetWord(j).GetName()))
                        {
                            var deasciified = nGramDeasciifier.Deasciify(
                                new Sentence(sentence.GetWord(j - 1).GetName() + " " + sentence.GetWord(j).GetName()));
                            Assert.AreEqual(sentence.GetWord(j).GetName(), deasciified.GetWord(1).GetName());
                        }
                    }
                }
            }
        }
        public Corpus.Corpus CreateCorpus(LeafToLanguageConverter leafToLanguageConverter)
        {
            var corpus = new Corpus.Corpus();

            foreach (var tree in parseTrees)
            {
                var parseTree = (ParseTreeDrawable)tree;
                TreeToStringConverter treeToStringConverter = new TreeToStringConverter(parseTree, leafToLanguageConverter);
                string sentence = treeToStringConverter.Convert();
                if (sentence != "")
                {
                    corpus.AddSentence(new Sentence(sentence));
                }
                else
                {
                    Console.WriteLine("Parse Tree " + parseTree.GetName() + " is not translated");
                }
            }
            return(corpus);
        }