Example #1
0
        public void TestDisambiguation()
        {
            var fsm          = new FsmMorphologicalAnalyzer();
            var corpus       = new DisambiguationCorpus("../../../penntreebank.txt");
            var algorithm    = new DummyDisambiguation();
            var correctParse = 0;
            var correctRoot  = 0;

            for (var i = 0; i < corpus.SentenceCount(); i++)
            {
                var sentenceAnalyses = fsm.RobustMorphologicalAnalysis(corpus.GetSentence(i));
                var fsmParses        = algorithm.Disambiguate(sentenceAnalyses);
                for (var j = 0; j < corpus.GetSentence(i).WordCount(); j++)
                {
                    var word = (DisambiguatedWord)corpus.GetSentence(i).GetWord(j);
                    if (fsmParses[j].TransitionList().Equals(word.GetParse().ToString()))
                    {
                        correctParse++;
                    }

                    if (fsmParses[j].GetWord().Equals(word.GetParse().GetWord()))
                    {
                        correctRoot++;
                    }
                }
            }

            Assert.AreEqual(0.86, (correctRoot + 0.0) / corpus.NumberOfWords(), 0.01);
            Assert.AreEqual(0.70, (correctParse + 0.0) / corpus.NumberOfWords(), 0.01);
        }
Example #2
0
 /**
  * <summary> Constructor of VectorizedNerInstanceGenerator which takes input a {@link VectorizedDictionary}, a window size
  * and a word format and sets corresponding attributes with these inputs.</summary>
  * <param name="fsm">Morphological analyzer used to create multi-word expressions.</param>
  * <param name="wordNet">WordNet for checking multii-word and single-word expressions.</param>
  * <param name="dictionary">Dictionary in the vector form. Each word is stored in vector form in this dictionary.</param>
  * <param name="windowSize">Number of previous (next) words to be considered in adding attributes.</param>
  * <param name="format">Word vector format.</param>
  */
 public VectorizedSemanticInstanceGenerator(FsmMorphologicalAnalyzer fsm, WordNet.WordNet wordNet,
                                            VectorizedDictionary dictionary, int windowSize, WordFormat format) : base(fsm, wordNet)
 {
     this._format     = format;
     this._dictionary = dictionary;
     this.windowSize  = windowSize;
 }
        public void TestDeasciify()
        {
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, true);
            var simpleAsciifier  = new SimpleAsciifier();
            var corpus           = new Corpus.Corpus("../../../corpus.txt");

            for (var i = 0; i < corpus.SentenceCount(); i++)
            {
                var sentence = corpus.GetSentence(i);
                for (var j = 1; j < sentence.WordCount(); j++)
                {
                    if (fsm.MorphologicalAnalysis(sentence.GetWord(j).GetName()).Size() > 0)
                    {
                        var asciified = simpleAsciifier.Asciify(sentence.GetWord(j));
                        if (!asciified.Equals(sentence.GetWord(j).GetName()))
                        {
                            var deasciified = nGramDeasciifier.Deasciify(
                                new Sentence(sentence.GetWord(j - 1).GetName() + " " + sentence.GetWord(j).GetName()));
                            Assert.AreEqual(sentence.GetWord(j).GetName(), deasciified.GetWord(1).GetName());
                        }
                    }
                }
            }
        }
Example #4
0
        public void TestDeasciify()
        {
            var fsm = new FsmMorphologicalAnalyzer();
            var simpleDeasciifier = new SimpleDeasciifier(fsm);
            var simpleAsciifier   = new SimpleAsciifier();

            for (var i = 0; i < fsm.GetDictionary().Size(); i++)
            {
                var word  = (TxtWord)fsm.GetDictionary().GetWord(i);
                var count = 0;
                for (var j = 0; j < word.GetName().Length; j++)
                {
                    switch (word.GetName()[j])
                    {
                    case 'ç':
                    case 'ö':
                    case 'ğ':
                    case 'ü':
                    case 'ş':
                    case 'ı':
                        count++;
                        break;
                    }
                }
                if (count > 0 && !word.GetName().EndsWith("fulü") && (word.IsNominal() || word.IsAdjective() || word.IsAdverb() || word.IsVerb()))
                {
                    var asciified = simpleAsciifier.Asciify(word);
                    if (simpleDeasciifier.CandidateList(new Word(asciified)).Count == 1)
                    {
                        var deasciified = simpleDeasciifier.Deasciify(new Sentence(asciified)).ToString();
                        Assert.AreEqual(word.GetName(), deasciified);
                    }
                }
            }
        }
Example #5
0
        public void TestGenerate()
        {
            var fsmMorphologicalAnalyzer = new FsmMorphologicalAnalyzer();
            var turkish = new WordNet.WordNet();
            var semanticDataSetGenerator = new SemanticDataSetGenerator("../../../trees/", ".dev", new FeaturedSemanticInstanceGenerator(fsmMorphologicalAnalyzer, turkish, 1));
            var dataSet = semanticDataSetGenerator.Generate();

            Assert.AreEqual(52, dataSet.SampleSize());
            Assert.AreEqual(35, dataSet.ClassCount());
            Assert.AreEqual(66, dataSet.AttributeCount());
        }
Example #6
0
        public void TestSpellCheckSurfaceForm()
        {
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramSpellChecker = new NGramSpellChecker(fsm, nGram, false);

            Assert.AreEqual("noter hakkında", nGramSpellChecker.SpellCheck(new Sentence("noter hakkınad")).ToString());
            Assert.AreEqual("arçelik'in çamaşır", nGramSpellChecker.SpellCheck(new Sentence("arçelik'in çamşaır")).ToString());
            Assert.AreEqual("ruhsat yanında", nGramSpellChecker.SpellCheck(new Sentence("ruhset yanında")).ToString());
        }
        public void TestDeasciify2()
        {
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramDeasciifier = new NGramDeasciifier(fsm, nGram, false);

            Assert.AreEqual("noter hakkında", nGramDeasciifier.Deasciify(new Sentence("noter hakkinda")).ToString());
            Assert.AreEqual("sandık medrese", nGramDeasciifier.Deasciify(new Sentence("sandik medrese")).ToString());
            Assert.AreEqual("kuran'ı karşılıklı", nGramDeasciifier.Deasciify(new Sentence("kuran'ı karsilikli")).ToString());
        }
        public void Setup()
        {
            var fsm = new FsmMorphologicalAnalyzer();

            parse1  = fsm.MorphologicalAnalysis("açılır").GetFsmParse(0);
            parse2  = fsm.MorphologicalAnalysis("koparılarak").GetFsmParse(0);
            parse3  = fsm.MorphologicalAnalysis("toplama").GetFsmParse(0);
            parse4  = fsm.MorphologicalAnalysis("değerlendirmede").GetFsmParse(0);
            parse5  = fsm.MorphologicalAnalysis("soruşturmasının").GetFsmParse(0);
            parse6  = fsm.MorphologicalAnalysis("karşılaştırmalı").GetFsmParse(0);
            parse7  = fsm.MorphologicalAnalysis("esaslarını").GetFsmParse(0);
            parse8  = fsm.MorphologicalAnalysis("güçleriyle").GetFsmParse(0);
            parse9  = fsm.MorphologicalAnalysis("bulmayacakları").GetFsmParse(0);
            parse10 = fsm.MorphologicalAnalysis("mü").GetFsmParse(0);
        }
Example #9
0
        public void TestSpellCheck()
        {
            Sentence[] original =
            {
                new Sentence("demokratik cumhuriyet en kıymetli varlığımızdır"),
                new Sentence("bu tablodaki değerler zedelenmeyecektir"),
                new Sentence("milliyet'in geleneksel yılın sporcusu anketi 43. yaşını doldurdu"),
                new Sentence("demokrasinin icadı bu ayrımı bulandırdı"),
                new Sentence("dışişleri müsteşarı Öymen'in 1997'nin ilk aylarında Bağdat'a gitmesi öngörülüyor"),
                new Sentence("büyüdü , palazlandı , devleti ele geçirdi"),
                new Sentence("her maskenin ciltte kalma süresi farklıdır"),
                new Sentence("yılın son ayında 10 gazeteci gözaltına alındı"),
                new Sentence("iki pilotun kullandığı uçakta bir hostes görev alıyor"),
                new Sentence("son derece kısıtlı kelimeler çerçevesinde kendilerini uzun cümlelerle ifade edebiliyorlar"),
                new Sentence("kedi köpek"),
                new Sentence("minibüs durağı"),
                new Sentence("noter belgesi"),
                new Sentence("")
            };
            Sentence[] modified =
            {
                new Sentence("demokratik cumhüriyet rn kımetli varlıgımızdır"),
                new Sentence("bu tblodaki değerlğr zedelenmeyecüktir"),
                new Sentence("milliyet'in geeneksel yılin spoşcusu ankşti 43. yeşını doldürdu"),
                new Sentence("demokrasinin icşdı buf ayrmıı bulandürdı"),
                new Sentence("dışişleri mütseşarı Öymen'in 1997'nin iljk aylğrında Bağdat'a gitmesi öngşrülüyor"),
                new Sentence("büyüdü , palazandı , devltei eöe geçridi"),
                new Sentence("her makenin cültte aklma sürdsi farlkıdır"),
                new Sentence("yılın sno ayında 10 gazteci gözlatına alündı"),
                new Sentence("iki piotun kulçandığı uçkata üir hotes görçv alyıor"),
                new Sentence("son deece kısütlı keilmeler çeçevesinde kendülerini uzuü cümllerle ifüde edbeiliyorlar"),
                new Sentence("krdi köpek"),
                new Sentence("minibü durağı"),
                new Sentence("ntoer belgesi"),
                new Sentence("")
            };
            var fsm   = new FsmMorphologicalAnalyzer();
            var nGram = new NGram <string>("../../../ngram.txt");

            nGram.CalculateNGramProbabilities(new NoSmoothing <string>());
            var nGramSpellChecker = new NGramSpellChecker(fsm, nGram, true);

            for (var i = 0; i < modified.Length; i++)
            {
                Assert.AreEqual(original[i].ToString(), nGramSpellChecker.SpellCheck(modified[i]).ToString());
            }
        }
        public void TestSpellCheck()
        {
            var fsm = new FsmMorphologicalAnalyzer();
            var simpleSpellChecker = new SimpleSpellChecker(fsm);
            var input = new StreamReader("../../../misspellings.txt");
            var line  = input.ReadLine();

            while (line != null)
            {
                var items      = line.Split(" ");
                var misspelled = items[0];
                var corrected  = items[1];
                Assert.AreEqual(corrected, simpleSpellChecker.SpellCheck(new Sentence(misspelled)).ToString());
                line = input.ReadLine();
            }
            input.Close();
        }
Example #11
0
 /**
  * <summary>A constructor of {@link NGramDeasciifier} class which takes an {@link FsmMorphologicalAnalyzer} and an {@link NGram}
  * as inputs. It first calls it super class {@link SimpleDeasciifier} with given {@link FsmMorphologicalAnalyzer} input
  * then initializes nGram variable with given {@link NGram} input.</summary>
  *
  * <param name="fsm">  {@link FsmMorphologicalAnalyzer} type input.</param>
  * <param name="nGram">{@link NGram} type input.</param>
  * <param name="rootNGram">True if the NGram is root nGram</param>
  */
 public NGramDeasciifier(FsmMorphologicalAnalyzer fsm, NGram <string> nGram, bool rootNGram) : base(fsm)
 {
     this.nGram     = nGram;
     this.rootNGram = rootNGram;
 }
Example #12
0
 public TurkishTreeAutoSemantic(WordNet.WordNet turkishWordNet, FsmMorphologicalAnalyzer fsm)
 {
     this._turkishWordNet = turkishWordNet;
     this._fsm            = fsm;
 }
 /**
  * <summary>A constructor of {@link SimpleSpellChecker} class which takes a {@link FsmMorphologicalAnalyzer} as an input and
  * assigns it to the fsm variable.</summary>
  *
  * <param name="fsm">{@link FsmMorphologicalAnalyzer} type input.</param>
  */
 public SimpleSpellChecker(FsmMorphologicalAnalyzer fsm)
 {
     this.fsm = fsm;
 }
Example #14
0
        protected List <SynSet> GetCandidateSynSets(WordNet.WordNet wordNet, FsmMorphologicalAnalyzer fsm,
                                                    List <ParseNodeDrawable> leafList, int index)
        {
            LayerInfo twoPrevious = null, previous = null, current, twoNext = null, next = null;
            var       synSets = new List <SynSet>();

            current = leafList[index].GetLayerInfo();
            if (index > 1)
            {
                twoPrevious = leafList[index - 2].GetLayerInfo();
            }

            if (index > 0)
            {
                previous = leafList[index - 1].GetLayerInfo();
            }

            if (index != leafList.Count - 1)
            {
                next = leafList[index + 1].GetLayerInfo();
            }

            if (index < leafList.Count - 2)
            {
                twoNext = leafList[index + 2].GetLayerInfo();
            }

            synSets = wordNet.ConstructSynSets(current.GetMorphologicalParseAt(0).GetWord().GetName(),
                                               current.GetMorphologicalParseAt(0), current.GetMetamorphicParseAt(0), fsm);
            if (twoPrevious?.GetMorphologicalParseAt(0) != null && previous?.GetMorphologicalParseAt(0) != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(twoPrevious.GetMorphologicalParseAt(0),
                                                               previous.GetMorphologicalParseAt(0), current.GetMorphologicalParseAt(0),
                                                               twoPrevious.GetMetamorphicParseAt(0), previous.GetMetamorphicParseAt(0),
                                                               current.GetMetamorphicParseAt(0), fsm));
            }

            if (previous?.GetMorphologicalParseAt(0) != null && next?.GetMorphologicalParseAt(0) != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetMorphologicalParseAt(0),
                                                               current.GetMorphologicalParseAt(0), next.GetMorphologicalParseAt(0),
                                                               previous.GetMetamorphicParseAt(0), current.GetMetamorphicParseAt(0),
                                                               next.GetMetamorphicParseAt(0), fsm));
            }

            if (next?.GetMorphologicalParseAt(0) != null && twoNext?.GetMorphologicalParseAt(0) != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetMorphologicalParseAt(0),
                                                               next.GetMorphologicalParseAt(0), twoNext.GetMorphologicalParseAt(0),
                                                               current.GetMetamorphicParseAt(0), next.GetMetamorphicParseAt(0),
                                                               twoNext.GetMetamorphicParseAt(0), fsm));
            }

            if (previous?.GetMorphologicalParseAt(0) != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetMorphologicalParseAt(0),
                                                               current.GetMorphologicalParseAt(0),
                                                               previous.GetMetamorphicParseAt(0), current.GetMetamorphicParseAt(0), fsm));
            }

            if (next?.GetMorphologicalParseAt(0) != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetMorphologicalParseAt(0),
                                                               next.GetMorphologicalParseAt(0),
                                                               current.GetMetamorphicParseAt(0), next.GetMetamorphicParseAt(0), fsm));
            }

            return(synSets);
        }
Example #15
0
 protected TreeAutoDisambiguator(FsmMorphologicalAnalyzer morphologicalAnalyzer)
 {
     this.morphologicalAnalyzer = morphologicalAnalyzer;
 }
        protected List <SynSet> GetCandidateSynSets(WordNet.WordNet wordNet, FsmMorphologicalAnalyzer fsm,
                                                    AnnotatedSentence.AnnotatedSentence sentence, int index)
        {
            AnnotatedWord twoPrevious = null, previous = null, current, twoNext = null, next = null;
            var           synSets = new List <SynSet>();

            current = (AnnotatedWord)sentence.GetWord(index);
            if (index > 1)
            {
                twoPrevious = (AnnotatedWord)sentence.GetWord(index - 2);
            }

            if (index > 0)
            {
                previous = (AnnotatedWord)sentence.GetWord(index - 1);
            }

            if (index != sentence.WordCount() - 1)
            {
                next = (AnnotatedWord)sentence.GetWord(index + 1);
            }

            if (index < sentence.WordCount() - 2)
            {
                twoNext = (AnnotatedWord)sentence.GetWord(index + 2);
            }

            synSets = wordNet.ConstructSynSets(current.GetParse().GetWord().GetName(),
                                               current.GetParse(), current.GetMetamorphicParse(), fsm);
            if (twoPrevious?.GetParse() != null && previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(twoPrevious.GetParse(), previous.GetParse(),
                                                               current.GetParse(),
                                                               twoPrevious.GetMetamorphicParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(),
                                                               fsm));
            }

            if (previous?.GetParse() != null && next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), next.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null && twoNext?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), twoNext.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), fsm));
            }

            if (previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            return(synSets);
        }
Example #17
0
 public Lesk(WordNet.WordNet turkishWordNet, FsmMorphologicalAnalyzer fsm)
 {
     this._turkishWordNet = turkishWordNet;
     this._fsm            = fsm;
 }
Example #18
0
 /**
  * <summary>Constructor method. Gets input window size and sets the corresponding variable.</summary>
  * <param name="fsm">Morphological analyzer to be used.</param>
  * <param name="wordNet">Wordnet to be used.</param>
  * <param name="windowSize">Number of previous (next) words to be considered in adding attributes.</param>
  */
 public FeaturedSemanticInstanceGenerator(FsmMorphologicalAnalyzer fsm, WordNet.WordNet wordNet, int windowSize) : base(fsm, wordNet)
 {
     this.windowSize = windowSize;
 }
 public void Setup()
 {
     fsm = new FsmMorphologicalAnalyzer();
 }
Example #20
0
 /**
  * <summary>A constructor of {@link NGramSpellChecker} class which takes a {@link FsmMorphologicalAnalyzer} and an {@link NGram}
  * as inputs. Then, calls its super class {@link SimpleSpellChecker} with given {@link FsmMorphologicalAnalyzer} and
  * assigns given {@link NGram} to the nGram variable.</summary>
  *
  * <param name="fsm">  {@link FsmMorphologicalAnalyzer} type input.</param>
  * <param name="nGram">{@link NGram} type input.</param>
  */
 public NGramSpellChecker(FsmMorphologicalAnalyzer fsm, NGram <string> nGram, bool rootNgram) : base(fsm)
 {
     this._nGram     = nGram;
     this._rootNgram = rootNgram;
 }
 /**
  * <summary> Constructor for the class.</summary>
  * <param name="fsm">               Finite State Machine based morphological analyzer</param>
  * <param name="rootWordStatistics">The object contains information about the selected correct root words in a corpus for a set
  *                           of possible lemma. For example, the lemma
  *                           `günü': 2 possible root words `gün' and `günü'
  *                           `çağlar' : 2 possible root words `çağ' and `çağlar'</param>
  */
 public TurkishSentenceAutoDisambiguator(FsmMorphologicalAnalyzer fsm) : base(fsm)
 {
 }
Example #22
0
 /**
  * <summary>A constructor of {@link SimpleDeasciifier} class which takes a {@link FsmMorphologicalAnalyzer} as an input and
  * initializes fsm variable with given {@link FsmMorphologicalAnalyzer} input.</summary>
  *
  * <param name="fsm">{@link FsmMorphologicalAnalyzer} type input.</param>
  */
 public SimpleDeasciifier(FsmMorphologicalAnalyzer fsm)
 {
     this.fsm = fsm;
 }
 public RandomSentenceAutoSemantic(WordNet.WordNet turkishWordNet, FsmMorphologicalAnalyzer fsm)
 {
     this._turkishWordNet = turkishWordNet;
     this._fsm            = fsm;
 }
 public void Setup()
 {
     fsm     = new FsmMorphologicalAnalyzer();
     wordNet = new WordNet.WordNet();
 }
Example #25
0
 /**
  * <summary>Constructor for the semantic instance generator. Takes morphological analyzer and wordnet as input to set the
  * corresponding variables.</summary>
  * <param name="fsm">Morphological analyzer to be used.</param>
  * <param name="wordNet">Wordnet to be used.</param>
  */
 public SemanticInstanceGenerator(FsmMorphologicalAnalyzer fsm, WordNet.WordNet wordNet)
 {
     this._fsm     = fsm;
     this._wordNet = wordNet;
 }