Example #1
0
        /**
         * <summary>A constructor of {@link SyllableList} class which takes a String word as an input. First it creates a syllable {@link ArrayList}
         * and a {@link StringBuilder} sbSyllable. Then it loops i times, where i ranges from 0 to length of given word, first
         * it gets the ith character of given word and checks whether it is a vowel and the last character of the word.
         * If it is a vowel it appends it to the sbSyllable and if it is the last vowel it also appends the next character to the sbSyllable.
         * Then, it adds the sbSyllable tot he syllables {@link ArrayList}.
         * If it is not a vowel, and the sbSyllable's length is 1 also the previous character is a consonant it gets the last item of
         * syllables {@link ArrayList} since there cannot be a Turkish word which starts with two consonants. However, if it is
         * two last characters of word, then it adds it to the syllable {@link ArrayList}. At the end, it updates the syllables {@link ArrayList}.</summary>
         *
         * <param name="word">String input.</param>
         */
        public SyllableList(string word)
        {
            _syllables = new List <Syllable>();

            string sbSyllable = "";

            for (int i = 0; i < word.Length; i++)
            {
                char c          = word[i];
                bool isVowel    = TurkishLanguage.IsVowel(c);
                bool isLastChar = i == word.Length - 1;
                if (isVowel)
                {
                    sbSyllable += c;
                    // If it is the last vowel.
                    if (i == word.Length - 2)
                    {
                        sbSyllable += word[i + 1];
                        i++;
                    }

                    _syllables.Add(new Syllable(sbSyllable));
                    sbSyllable = "";
                }
                else
                {
                    // A syllable should not start with two consonants.
                    var tempSyl = sbSyllable;
                    if (tempSyl.Length == 1)
                    {
                        // The previous character was also a consonant.
                        if (!TurkishLanguage.IsVowel(tempSyl[0]))
                        {
                            if (_syllables.Count == 0)
                            {
                                sbSyllable += c;
                                continue;
                            }
                            var lastPos = _syllables.Count - 1;
                            var str     = _syllables[lastPos].GetText();
                            str += tempSyl;
                            if (isLastChar)
                            {
                                // If the last char is also a consonant, add it to latest syllable. Ex: 'park'.
                                str += c;
                            }

                            // Update previous syllable.
                            _syllables[lastPos] = new Syllable(str);
                            sbSyllable          = "";
                        }
                    }

                    sbSyllable += c;
                }
            }
        }
        /**
         * <summary>The resolveSh method takes a {@link string} formation as an input. If the last character is a vowel, it concatenates
         * given formation with ş, if the last character is not a vowel, and not 't' it directly returns given formation, but if it
         * is equal to 't', it transforms it to 'd'.</summary>
         *
         * <param name="formation">{@link string} input.</param>
         * <returns>resolved string.</returns>
         */
        public static string ResolveSh(string formation)
        {
            if (TurkishLanguage.IsVowel(formation[formation.Length - 1]))
            {
                return(formation + 'ş');
            }

            if (formation[formation.Length - 1] != 't')
            {
                return(formation);
            }
            return(formation.Substring(0, formation.Length - 1) + 'd');
        }
Example #3
0
        /**
         * <summary>The startWithVowelOrConsonantDrops method checks for some cases. If the first character of with variable is "nsy",
         * and with variable does not equal to one of the strings; "ylA, ysA, ymHs, yDH, yken", it returns true. If
         * <p/>
         * Or, if the first character of with variable is 'A, H: or any other vowels, it returns true.</summary>
         *
         * <returns>true if it starts with vowel or consonant drops, false otherwise.</returns>
         */
        private bool StartWithVowelOrConsonantDrops()
        {
            if (TurkishLanguage.IsConsonantDrop(WithFirstChar()) && _with != "ylA" &&
                _with != "ysA" && _with != "ymHs" && _with != "yDH" && _with != "yken")
            {
                return(true);
            }

            if (WithFirstChar() == 'A' || WithFirstChar() == 'H' || TurkishLanguage.IsVowel(WithFirstChar()))
            {
                return(true);
            }

            return(false);
        }
Example #4
0
        /**
         * <summary>The LastVowel method takes a {@link string} stem as an input. It loops through the given stem and returns
         * the last vowel.</summary>
         *
         * <param name="stem">string input.</param>
         * <returns>the last vowel.</returns>
         */
        public static char LastVowel(string stem)
        {
            int i;

            for (i = stem.Length - 1; i >= 0; i--)
            {
                if (TurkishLanguage.IsVowel(stem[i]))
                {
                    return(stem[i]);
                }
            }

            for (i = stem.Length - 1; i >= 0; i--)
            {
                if (stem[i] >= '0' && stem[i] <= '9')
                {
                    return(stem[i]);
                }
            }

            return('0');
        }
Example #5
0
        /**
         * <summary>The BeforeLastVowel method takes a {@link string} stem as an input. It loops through the given stem and returns
         * the second last vowel.</summary>
         *
         * <param name="stem">string input.</param>
         * <returns>Vowel before the last vowel.</returns>
         */
        public static char BeforeLastVowel(string stem)
        {
            int i, before = 1;
            var last = '0';

            for (i = stem.Length - 1; i >= 0; i--)
            {
                if (TurkishLanguage.IsVowel(stem[i]))
                {
                    if (before == 1)
                    {
                        last = stem[i];
                        before--;
                        continue;
                    }

                    return(stem[i]);
                }
            }

            return(last);
        }
Example #6
0
        public string MakeTransition(TxtWord root, string stem, State startState)
        {
            var rootWord  = root.GetName() == stem || root.GetName() + "'" == stem;
            var formation = stem;
            var i         = 0;

            if (_with == "0")
            {
                return(stem);
            }

            if ((stem.Equals("bu") || stem.Equals("şu") || stem.Equals("o")) && rootWord &&
                _with == "ylA")
            {
                return(stem + "nunla");
            }

            if (_with == "yA")
            {
                if (stem.Equals("ben"))
                {
                    return("bana");
                }

                if (stem.Equals("sen"))
                {
                    return("sana");
                }
            }

            _formationToCheck = stem;
            //---vowelEChangesToIDuringYSuffixation---
            //de->d(i)yor, ye->y(i)yor
            if (rootWord && WithFirstChar() == 'y' && root.VowelEChangesToIDuringYSuffixation() &&
                (_with[1] != 'H' || root.GetName() == "ye"))
            {
                formation         = stem.Substring(0, stem.Length - 1) + 'i';
                _formationToCheck = formation;
            }
            else
            {
                //---lastIdropsDuringPassiveSuffixation---
                // yoğur->yoğrul, ayır->ayrıl, buyur->buyrul, çağır->çağrıl, çevir->çevril, devir->devril,
                // kavur->kavrul, kayır->kayrıl, kıvır->kıvrıl, savur->savrul, sıyır->sıyrıl, yoğur->yoğrul
                if (rootWord && (_with == "Hl" || _with == "Hn") && root.LastIdropsDuringPassiveSuffixation())
                {
                    formation         = stem.Substring(0, stem.Length - 2) + stem[stem.Length - 1];
                    _formationToCheck = stem;
                }
                else
                {
                    //---showsSuRegularities---
                    //karasu->karasuyu, özsu->özsuyu, ağırsu->ağırsuyu, akarsu->akarsuyu, bengisu->bengisuyu
                    if (rootWord && root.ShowsSuRegularities() && StartWithVowelOrConsonantDrops() &&
                        !_with.StartsWith("y"))
                    {
                        formation         = stem + 'y';
                        _formationToCheck = formation;
                    }
                    else
                    {
                        if (rootWord && root.DuplicatesDuringSuffixation() &&
                            !startState.GetName().StartsWith("VerbalRoot") &&
                            TurkishLanguage.IsConsonantDrop(_with[0]))
                        {
                            //---duplicatesDuringSuffixation---
                            if (SoftenDuringSuffixation(root))
                            {
                                //--extra softenDuringSuffixation
                                switch (Word.LastPhoneme(stem))
                                {
                                case 'p':
                                    //tıp->tıbbı
                                    formation = stem.Substring(0, stem.Length - 1) + "bb";
                                    break;

                                case 't':
                                    //cet->ceddi, met->meddi, ret->reddi, serhat->serhaddi, zıt->zıddı, şet->şeddi
                                    formation = stem.Substring(0, stem.Length - 1) + "dd";
                                    break;
                                }
                            }
                            else
                            {
                                //cer->cerri, emrihak->emrihakkı, fek->fekki, fen->fenni, had->haddi, hat->hattı,
                                // haz->hazzı, his->hissi
                                formation = stem + stem[stem.Length - 1];
                            }

                            _formationToCheck = formation;
                        }
                        else
                        {
                            if (rootWord && root.LastIdropsDuringSuffixation() &&
                                !startState.GetName().StartsWith("VerbalRoot") &&
                                !startState.GetName().StartsWith("ProperRoot") && StartWithVowelOrConsonantDrops())
                            {
                                //---lastIdropsDuringSuffixation---
                                if (SoftenDuringSuffixation(root))
                                {
                                    //---softenDuringSuffixation---
                                    switch (Word.LastPhoneme(stem))
                                    {
                                    case 'p':
                                        //hizip->hizbi, kayıp->kaybı, kayıt->kaydı, kutup->kutbu
                                        formation = stem.Substring(0, stem.Length - 2) + 'b';
                                        break;

                                    case 't':
                                        //akit->akdi, ahit->ahdi, lahit->lahdi, nakit->nakdi, vecit->vecdi
                                        formation = stem.Substring(0, stem.Length - 2) + 'd';
                                        break;

                                    case 'ç':
                                        //eviç->evci, nesiç->nesci
                                        formation = stem.Substring(0, stem.Length - 2) + 'c';
                                        break;
                                    }
                                }
                                else
                                {
                                    //sarıağız->sarıağzı, zehir->zehri, zikir->zikri, nutuk->nutku, omuz->omzu, ömür->ömrü
                                    //lütuf->lütfu, metin->metni, kavim->kavmi, kasıt->kastı
                                    formation = stem.Substring(0, stem.Length - 2) + stem[stem.Length - 1];
                                }

                                _formationToCheck = stem;
                            }
                            else
                            {
                                switch (Word.LastPhoneme(stem))
                                {
                                //---nounSoftenDuringSuffixation or verbSoftenDuringSuffixation
                                case 'p':
                                    //adap->adabı, amip->amibi, azap->azabı, gazap->gazabı
                                    if (StartWithVowelOrConsonantDrops() && rootWord &&
                                        SoftenDuringSuffixation(root))
                                    {
                                        formation = stem.Substring(0, stem.Length - 1) + 'b';
                                    }

                                    break;

                                case 't':
                                    //abat->abadı, adet->adedi, akort->akordu, armut->armudu
                                    //affet->affedi, yoket->yokedi, sabret->sabredi, rakset->raksedi
                                    if (StartWithVowelOrConsonantDrops() && rootWord &&
                                        SoftenDuringSuffixation(root))
                                    {
                                        formation = stem.Substring(0, stem.Length - 1) + 'd';
                                    }

                                    break;

                                case 'ç':
                                    //ağaç->ağacı, almaç->almacı, akaç->akacı, avuç->avucu
                                    if (StartWithVowelOrConsonantDrops() && rootWord &&
                                        SoftenDuringSuffixation(root))
                                    {
                                        formation = stem.Substring(0, stem.Length - 1) + 'c';
                                    }

                                    break;

                                case 'g':
                                    //arkeolog->arkeoloğu, filolog->filoloğu, minerolog->mineroloğu
                                    if (StartWithVowelOrConsonantDrops() && rootWord &&
                                        SoftenDuringSuffixation(root))
                                    {
                                        formation = stem.Substring(0, stem.Length - 1) + 'ğ';
                                    }

                                    break;

                                case 'k':
                                    //ahenk->ahengi, künk->küngü, renk->rengi, pelesenk->pelesengi
                                    if (StartWithVowelOrConsonantDrops() && rootWord &&
                                        root.EndingKChangesIntoG() && !root.IsProperNoun())
                                    {
                                        formation = stem.Substring(0, stem.Length - 1) + 'g';
                                    }
                                    else
                                    {
                                        //ablak->ablağı, küllük->küllüğü, kitaplık->kitaplığı, evcilik->evciliği
                                        if (StartWithVowelOrConsonantDrops() &&
                                            (!rootWord ||
                                             (SoftenDuringSuffixation(root) &&
                                              (!root.IsProperNoun() ||
                                               !startState.ToString().Equals("ProperRoot")))))
                                        {
                                            formation = stem.Substring(0, stem.Length - 1) + 'ğ';
                                        }
                                    }

                                    break;
                                }

                                _formationToCheck = formation;
                            }
                        }
                    }
                }
            }

            if (TurkishLanguage.IsConsonantDrop(WithFirstChar()) &&
                !TurkishLanguage.IsVowel(stem[stem.Length - 1]) &&
                (root.IsNumeral() || root.IsReal() || root.IsFraction() || root.IsTime() || root.IsDate() ||
                 root.IsPercent() || root.IsRange()) && (root.GetName().EndsWith("1") || root.GetName().EndsWith("3") ||
                                                         root.GetName().EndsWith("4") || root.GetName().EndsWith("5") ||
                                                         root.GetName().EndsWith("8") || root.GetName().EndsWith("9") ||
                                                         root.GetName().EndsWith("10") ||
                                                         root.GetName().EndsWith("30") ||
                                                         root.GetName().EndsWith("40") ||
                                                         root.GetName().EndsWith("60") ||
                                                         root.GetName().EndsWith("70") ||
                                                         root.GetName().EndsWith("80") ||
                                                         root.GetName().EndsWith("90") ||
                                                         root.GetName().EndsWith("00")))
            {
                if (_with[0] == '\'')
                {
                    formation += '\'';
                    i          = 2;
                }
                else
                {
                    i = 1;
                }
            }
            else
            {
                if ((TurkishLanguage.IsConsonantDrop(WithFirstChar()) &&
                     TurkishLanguage.IsConsonant(Word.LastPhoneme(stem))) ||
                    (rootWord && root.ConsonantSMayInsertedDuringPossesiveSuffixation()))
                {
                    if (_with[0] == '\'')
                    {
                        formation += '\'';
                        if (root.IsAbbreviation())
                        {
                            i = 1;
                        }
                        else
                        {
                            i = 2;
                        }
                    }
                    else
                    {
                        i = 1;
                    }
                }
            }

            for (; i < _with.Length; i++)
            {
                switch (_with[i])
                {
                case 'D':
                    formation = MorphotacticEngine.ResolveD(root, formation, _formationToCheck);
                    break;

                case 'A':
                    formation = MorphotacticEngine.ResolveA(root, formation, rootWord, _formationToCheck);
                    break;

                case 'H':
                    if (_with[0] != '\'')
                    {
                        formation = MorphotacticEngine.ResolveH(root, formation, i == 0, _with.StartsWith("Hyor"), rootWord, _formationToCheck);
                    }
                    else
                    {
                        formation = MorphotacticEngine.ResolveH(root, formation, i == 1, false, rootWord, _formationToCheck);
                    }

                    break;

                case 'C':
                    formation = MorphotacticEngine.ResolveC(formation, _formationToCheck);
                    break;

                case 'S':
                    formation = MorphotacticEngine.ResolveS(formation);
                    break;

                case 'Ş':
                    formation = MorphotacticEngine.ResolveSh(formation);
                    break;

                default:
                    if (i == _with.Length - 1 && _with[i] == 's')
                    {
                        formation += 'ş';
                    }
                    else
                    {
                        formation += _with[i];
                    }

                    break;
                }

                _formationToCheck = formation;
            }

            return(formation);
        }
        public static string ResolveH(TxtWord root, string formation, bool beginningOfSuffix,
                                      bool specialCaseTenseSuffix, bool rootWord, string formationToCheck)
        {
            if (root.IsAbbreviation())
            {
                return(formation + 'i');
            }
            if (beginningOfSuffix && TurkishLanguage.IsVowel(Word.LastPhoneme(formationToCheck)) && !specialCaseTenseSuffix)
            {
                return(formation);
            }

            if (specialCaseTenseSuffix)
            {
                //eğer ek Hyor eki ise,
                if (rootWord)
                {
                    if (root.VowelAChangesToIDuringYSuffixation())
                    {
                        if (TurkishLanguage.IsFrontRoundedVowel(Word.BeforeLastVowel(formationToCheck)))
                        {
                            //büyülüyor, bölümlüyor, çözümlüyor, döşüyor
                            return(formation.Substring(0, formation.Length - 1) + 'ü');
                        }

                        if (TurkishLanguage.IsFrontUnroundedVowel(Word.BeforeLastVowel(formationToCheck)))
                        {
                            //adresliyor, alevliyor, ateşliyor, bekliyor
                            return(formation.Substring(0, formation.Length - 1) + 'i');
                        }

                        if (TurkishLanguage.IsBackRoundedVowel(Word.BeforeLastVowel(formationToCheck)))
                        {
                            //buğuluyor, bulguluyor, çamurluyor, aforozluyor
                            return(formation.Substring(0, formation.Length - 1) + 'u');
                        }

                        if (TurkishLanguage.IsBackUnroundedVowel(Word.BeforeLastVowel(formationToCheck)))
                        {
                            //açıklıyor, çalkalıyor, gazlıyor, gıcırdıyor
                            return(formation.Substring(0, formation.Length - 1) + 'ı');
                        }
                    }
                }

                if (TurkishLanguage.IsVowel(Word.LastPhoneme(formationToCheck)))
                {
                    if (TurkishLanguage.IsFrontRoundedVowel(Word.BeforeLastVowel(formationToCheck)))
                    {
                        return(formation.Substring(0, formation.Length - 1) + 'ü');
                    }

                    if (TurkishLanguage.IsFrontUnroundedVowel(Word.BeforeLastVowel(formationToCheck)))
                    {
                        return(formation.Substring(0, formation.Length - 1) + 'i');
                    }

                    if (TurkishLanguage.IsBackRoundedVowel(Word.BeforeLastVowel(formationToCheck)))
                    {
                        return(formation.Substring(0, formation.Length - 1) + 'u');
                    }

                    if (TurkishLanguage.IsBackUnroundedVowel(Word.BeforeLastVowel(formationToCheck)))
                    {
                        return(formation.Substring(0, formation.Length - 1) + 'ı');
                    }
                }
            }

            if (TurkishLanguage.IsFrontRoundedVowel(Word.LastVowel(formationToCheck)) ||
                (TurkishLanguage.IsBackRoundedVowel(Word.LastVowel(formationToCheck)) &&
                 root.NotObeysVowelHarmonyDuringAgglutination()))
            {
                return(formation + 'ü');
            }

            if (TurkishLanguage.IsFrontUnroundedVowel(Word.LastVowel(formationToCheck)) ||
                (Word.LastVowel(formationToCheck) == 'a' && root.NotObeysVowelHarmonyDuringAgglutination()))
            {
                return(formation + 'i');
            }

            if (TurkishLanguage.IsBackRoundedVowel(Word.LastVowel(formationToCheck)))
            {
                return(formation + 'u');
            }

            if (TurkishLanguage.IsBackUnroundedVowel(Word.LastVowel(formationToCheck)))
            {
                return(formation + 'ı');
            }

            if (root.IsNumeral() || root.IsFraction() || root.IsReal())
            {
                if (root.GetName().EndsWith("6") || root.GetName().EndsWith("40") || root.GetName().EndsWith("60") ||
                    root.GetName().EndsWith("90"))
                {
                    //6'yı, 40'ı, 60'ı
                    return(formation + 'ı');
                }

                if (root.GetName().EndsWith("3") || root.GetName().EndsWith("4") || root.GetName().EndsWith("00"))
                {
                    //3'ü, 4'ü, 100'ü
                    return(formation + 'ü');
                }

                if (root.GetName().EndsWith("9") || root.GetName().EndsWith("10") ||
                    root.GetName().EndsWith("30"))
                {
                    //9'u, 10'u, 30'u
                    return(formation + 'u');
                }

                //2'yi, 5'i, 8'i
                return(formation + 'i');
            }

            return(formation);
        }