/** * <summary>Constructor for the {@link Iteration} class. Get corpus and parameter as input, sets the corresponding * parameters.</summary> * <param name="corpus">Corpus used to train word vectors using Word2Vec algorithm.</param> * <param name="wordToVecParameter">Parameters of the Word2Vec algorithm.</param> */ public Iteration(Corpus.Corpus corpus, WordToVecParameter wordToVecParameter) { this.corpus = corpus; this._wordToVecParameter = wordToVecParameter; _startingAlpha = wordToVecParameter.GetAlpha(); _alpha = wordToVecParameter.GetAlpha(); }
public void TestDeasciify() { var fsm = new FsmMorphologicalAnalyzer(); var nGram = new NGram <string>("../../../ngram.txt"); nGram.CalculateNGramProbabilities(new NoSmoothing <string>()); var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, true); var simpleAsciifier = new SimpleAsciifier(); var corpus = new Corpus.Corpus("../../../corpus.txt"); for (var i = 0; i < corpus.SentenceCount(); i++) { var sentence = corpus.GetSentence(i); for (var j = 1; j < sentence.WordCount(); j++) { if (fsm.MorphologicalAnalysis(sentence.GetWord(j).GetName()).Size() > 0) { var asciified = simpleAsciifier.Asciify(sentence.GetWord(j)); if (!asciified.Equals(sentence.GetWord(j).GetName())) { var deasciified = nGramDeasciifier.Deasciify( new Sentence(sentence.GetWord(j - 1).GetName() + " " + sentence.GetWord(j).GetName())); Assert.AreEqual(sentence.GetWord(j).GetName(), deasciified.GetWord(1).GetName()); } } } } }
public Corpus.Corpus CreateCorpus(LeafToLanguageConverter leafToLanguageConverter) { var corpus = new Corpus.Corpus(); foreach (var tree in parseTrees) { var parseTree = (ParseTreeDrawable)tree; TreeToStringConverter treeToStringConverter = new TreeToStringConverter(parseTree, leafToLanguageConverter); string sentence = treeToStringConverter.Convert(); if (sentence != "") { corpus.AddSentence(new Sentence(sentence)); } else { Console.WriteLine("Parse Tree " + parseTree.GetName() + " is not translated"); } } return(corpus); }