/** * <summary>A constructor of {@link SyllableList} class which takes a String word as an input. First it creates a syllable {@link ArrayList} * and a {@link StringBuilder} sbSyllable. Then it loops i times, where i ranges from 0 to length of given word, first * it gets the ith character of given word and checks whether it is a vowel and the last character of the word. * If it is a vowel it appends it to the sbSyllable and if it is the last vowel it also appends the next character to the sbSyllable. * Then, it adds the sbSyllable tot he syllables {@link ArrayList}. * If it is not a vowel, and the sbSyllable's length is 1 also the previous character is a consonant it gets the last item of * syllables {@link ArrayList} since there cannot be a Turkish word which starts with two consonants. However, if it is * two last characters of word, then it adds it to the syllable {@link ArrayList}. At the end, it updates the syllables {@link ArrayList}.</summary> * * <param name="word">String input.</param> */ public SyllableList(string word) { _syllables = new List <Syllable>(); string sbSyllable = ""; for (int i = 0; i < word.Length; i++) { char c = word[i]; bool isVowel = TurkishLanguage.IsVowel(c); bool isLastChar = i == word.Length - 1; if (isVowel) { sbSyllable += c; // If it is the last vowel. if (i == word.Length - 2) { sbSyllable += word[i + 1]; i++; } _syllables.Add(new Syllable(sbSyllable)); sbSyllable = ""; } else { // A syllable should not start with two consonants. var tempSyl = sbSyllable; if (tempSyl.Length == 1) { // The previous character was also a consonant. if (!TurkishLanguage.IsVowel(tempSyl[0])) { if (_syllables.Count == 0) { sbSyllable += c; continue; } var lastPos = _syllables.Count - 1; var str = _syllables[lastPos].GetText(); str += tempSyl; if (isLastChar) { // If the last char is also a consonant, add it to latest syllable. Ex: 'park'. str += c; } // Update previous syllable. _syllables[lastPos] = new Syllable(str); sbSyllable = ""; } } sbSyllable += c; } } }
/** * <summary>The resolveSh method takes a {@link string} formation as an input. If the last character is a vowel, it concatenates * given formation with ş, if the last character is not a vowel, and not 't' it directly returns given formation, but if it * is equal to 't', it transforms it to 'd'.</summary> * * <param name="formation">{@link string} input.</param> * <returns>resolved string.</returns> */ public static string ResolveSh(string formation) { if (TurkishLanguage.IsVowel(formation[formation.Length - 1])) { return(formation + 'ş'); } if (formation[formation.Length - 1] != 't') { return(formation); } return(formation.Substring(0, formation.Length - 1) + 'd'); }
/** * <summary>The startWithVowelOrConsonantDrops method checks for some cases. If the first character of with variable is "nsy", * and with variable does not equal to one of the strings; "ylA, ysA, ymHs, yDH, yken", it returns true. If * <p/> * Or, if the first character of with variable is 'A, H: or any other vowels, it returns true.</summary> * * <returns>true if it starts with vowel or consonant drops, false otherwise.</returns> */ private bool StartWithVowelOrConsonantDrops() { if (TurkishLanguage.IsConsonantDrop(WithFirstChar()) && _with != "ylA" && _with != "ysA" && _with != "ymHs" && _with != "yDH" && _with != "yken") { return(true); } if (WithFirstChar() == 'A' || WithFirstChar() == 'H' || TurkishLanguage.IsVowel(WithFirstChar())) { return(true); } return(false); }
/** * <summary>The LastVowel method takes a {@link string} stem as an input. It loops through the given stem and returns * the last vowel.</summary> * * <param name="stem">string input.</param> * <returns>the last vowel.</returns> */ public static char LastVowel(string stem) { int i; for (i = stem.Length - 1; i >= 0; i--) { if (TurkishLanguage.IsVowel(stem[i])) { return(stem[i]); } } for (i = stem.Length - 1; i >= 0; i--) { if (stem[i] >= '0' && stem[i] <= '9') { return(stem[i]); } } return('0'); }
/** * <summary>The BeforeLastVowel method takes a {@link string} stem as an input. It loops through the given stem and returns * the second last vowel.</summary> * * <param name="stem">string input.</param> * <returns>Vowel before the last vowel.</returns> */ public static char BeforeLastVowel(string stem) { int i, before = 1; var last = '0'; for (i = stem.Length - 1; i >= 0; i--) { if (TurkishLanguage.IsVowel(stem[i])) { if (before == 1) { last = stem[i]; before--; continue; } return(stem[i]); } } return(last); }
public string MakeTransition(TxtWord root, string stem, State startState) { var rootWord = root.GetName() == stem || root.GetName() + "'" == stem; var formation = stem; var i = 0; if (_with == "0") { return(stem); } if ((stem.Equals("bu") || stem.Equals("şu") || stem.Equals("o")) && rootWord && _with == "ylA") { return(stem + "nunla"); } if (_with == "yA") { if (stem.Equals("ben")) { return("bana"); } if (stem.Equals("sen")) { return("sana"); } } _formationToCheck = stem; //---vowelEChangesToIDuringYSuffixation--- //de->d(i)yor, ye->y(i)yor if (rootWord && WithFirstChar() == 'y' && root.VowelEChangesToIDuringYSuffixation() && (_with[1] != 'H' || root.GetName() == "ye")) { formation = stem.Substring(0, stem.Length - 1) + 'i'; _formationToCheck = formation; } else { //---lastIdropsDuringPassiveSuffixation--- // yoğur->yoğrul, ayır->ayrıl, buyur->buyrul, çağır->çağrıl, çevir->çevril, devir->devril, // kavur->kavrul, kayır->kayrıl, kıvır->kıvrıl, savur->savrul, sıyır->sıyrıl, yoğur->yoğrul if (rootWord && (_with == "Hl" || _with == "Hn") && root.LastIdropsDuringPassiveSuffixation()) { formation = stem.Substring(0, stem.Length - 2) + stem[stem.Length - 1]; _formationToCheck = stem; } else { //---showsSuRegularities--- //karasu->karasuyu, özsu->özsuyu, ağırsu->ağırsuyu, akarsu->akarsuyu, bengisu->bengisuyu if (rootWord && root.ShowsSuRegularities() && StartWithVowelOrConsonantDrops() && !_with.StartsWith("y")) { formation = stem + 'y'; _formationToCheck = formation; } else { if (rootWord && root.DuplicatesDuringSuffixation() && !startState.GetName().StartsWith("VerbalRoot") && TurkishLanguage.IsConsonantDrop(_with[0])) { //---duplicatesDuringSuffixation--- if (SoftenDuringSuffixation(root)) { //--extra softenDuringSuffixation switch (Word.LastPhoneme(stem)) { case 'p': //tıp->tıbbı formation = stem.Substring(0, stem.Length - 1) + "bb"; break; case 't': //cet->ceddi, met->meddi, ret->reddi, serhat->serhaddi, zıt->zıddı, şet->şeddi formation = stem.Substring(0, stem.Length - 1) + "dd"; break; } } else { //cer->cerri, emrihak->emrihakkı, fek->fekki, fen->fenni, had->haddi, hat->hattı, // haz->hazzı, his->hissi formation = stem + stem[stem.Length - 1]; } _formationToCheck = formation; } else { if (rootWord && root.LastIdropsDuringSuffixation() && !startState.GetName().StartsWith("VerbalRoot") && !startState.GetName().StartsWith("ProperRoot") && StartWithVowelOrConsonantDrops()) { //---lastIdropsDuringSuffixation--- if (SoftenDuringSuffixation(root)) { //---softenDuringSuffixation--- switch (Word.LastPhoneme(stem)) { case 'p': //hizip->hizbi, kayıp->kaybı, kayıt->kaydı, kutup->kutbu formation = stem.Substring(0, stem.Length - 2) + 'b'; break; case 't': //akit->akdi, ahit->ahdi, lahit->lahdi, nakit->nakdi, vecit->vecdi formation = stem.Substring(0, stem.Length - 2) + 'd'; break; case 'ç': //eviç->evci, nesiç->nesci formation = stem.Substring(0, stem.Length - 2) + 'c'; break; } } else { //sarıağız->sarıağzı, zehir->zehri, zikir->zikri, nutuk->nutku, omuz->omzu, ömür->ömrü //lütuf->lütfu, metin->metni, kavim->kavmi, kasıt->kastı formation = stem.Substring(0, stem.Length - 2) + stem[stem.Length - 1]; } _formationToCheck = stem; } else { switch (Word.LastPhoneme(stem)) { //---nounSoftenDuringSuffixation or verbSoftenDuringSuffixation case 'p': //adap->adabı, amip->amibi, azap->azabı, gazap->gazabı if (StartWithVowelOrConsonantDrops() && rootWord && SoftenDuringSuffixation(root)) { formation = stem.Substring(0, stem.Length - 1) + 'b'; } break; case 't': //abat->abadı, adet->adedi, akort->akordu, armut->armudu //affet->affedi, yoket->yokedi, sabret->sabredi, rakset->raksedi if (StartWithVowelOrConsonantDrops() && rootWord && SoftenDuringSuffixation(root)) { formation = stem.Substring(0, stem.Length - 1) + 'd'; } break; case 'ç': //ağaç->ağacı, almaç->almacı, akaç->akacı, avuç->avucu if (StartWithVowelOrConsonantDrops() && rootWord && SoftenDuringSuffixation(root)) { formation = stem.Substring(0, stem.Length - 1) + 'c'; } break; case 'g': //arkeolog->arkeoloğu, filolog->filoloğu, minerolog->mineroloğu if (StartWithVowelOrConsonantDrops() && rootWord && SoftenDuringSuffixation(root)) { formation = stem.Substring(0, stem.Length - 1) + 'ğ'; } break; case 'k': //ahenk->ahengi, künk->küngü, renk->rengi, pelesenk->pelesengi if (StartWithVowelOrConsonantDrops() && rootWord && root.EndingKChangesIntoG() && !root.IsProperNoun()) { formation = stem.Substring(0, stem.Length - 1) + 'g'; } else { //ablak->ablağı, küllük->küllüğü, kitaplık->kitaplığı, evcilik->evciliği if (StartWithVowelOrConsonantDrops() && (!rootWord || (SoftenDuringSuffixation(root) && (!root.IsProperNoun() || !startState.ToString().Equals("ProperRoot"))))) { formation = stem.Substring(0, stem.Length - 1) + 'ğ'; } } break; } _formationToCheck = formation; } } } } } if (TurkishLanguage.IsConsonantDrop(WithFirstChar()) && !TurkishLanguage.IsVowel(stem[stem.Length - 1]) && (root.IsNumeral() || root.IsReal() || root.IsFraction() || root.IsTime() || root.IsDate() || root.IsPercent() || root.IsRange()) && (root.GetName().EndsWith("1") || root.GetName().EndsWith("3") || root.GetName().EndsWith("4") || root.GetName().EndsWith("5") || root.GetName().EndsWith("8") || root.GetName().EndsWith("9") || root.GetName().EndsWith("10") || root.GetName().EndsWith("30") || root.GetName().EndsWith("40") || root.GetName().EndsWith("60") || root.GetName().EndsWith("70") || root.GetName().EndsWith("80") || root.GetName().EndsWith("90") || root.GetName().EndsWith("00"))) { if (_with[0] == '\'') { formation += '\''; i = 2; } else { i = 1; } } else { if ((TurkishLanguage.IsConsonantDrop(WithFirstChar()) && TurkishLanguage.IsConsonant(Word.LastPhoneme(stem))) || (rootWord && root.ConsonantSMayInsertedDuringPossesiveSuffixation())) { if (_with[0] == '\'') { formation += '\''; if (root.IsAbbreviation()) { i = 1; } else { i = 2; } } else { i = 1; } } } for (; i < _with.Length; i++) { switch (_with[i]) { case 'D': formation = MorphotacticEngine.ResolveD(root, formation, _formationToCheck); break; case 'A': formation = MorphotacticEngine.ResolveA(root, formation, rootWord, _formationToCheck); break; case 'H': if (_with[0] != '\'') { formation = MorphotacticEngine.ResolveH(root, formation, i == 0, _with.StartsWith("Hyor"), rootWord, _formationToCheck); } else { formation = MorphotacticEngine.ResolveH(root, formation, i == 1, false, rootWord, _formationToCheck); } break; case 'C': formation = MorphotacticEngine.ResolveC(formation, _formationToCheck); break; case 'S': formation = MorphotacticEngine.ResolveS(formation); break; case 'Ş': formation = MorphotacticEngine.ResolveSh(formation); break; default: if (i == _with.Length - 1 && _with[i] == 's') { formation += 'ş'; } else { formation += _with[i]; } break; } _formationToCheck = formation; } return(formation); }
public static string ResolveH(TxtWord root, string formation, bool beginningOfSuffix, bool specialCaseTenseSuffix, bool rootWord, string formationToCheck) { if (root.IsAbbreviation()) { return(formation + 'i'); } if (beginningOfSuffix && TurkishLanguage.IsVowel(Word.LastPhoneme(formationToCheck)) && !specialCaseTenseSuffix) { return(formation); } if (specialCaseTenseSuffix) { //eğer ek Hyor eki ise, if (rootWord) { if (root.VowelAChangesToIDuringYSuffixation()) { if (TurkishLanguage.IsFrontRoundedVowel(Word.BeforeLastVowel(formationToCheck))) { //büyülüyor, bölümlüyor, çözümlüyor, döşüyor return(formation.Substring(0, formation.Length - 1) + 'ü'); } if (TurkishLanguage.IsFrontUnroundedVowel(Word.BeforeLastVowel(formationToCheck))) { //adresliyor, alevliyor, ateşliyor, bekliyor return(formation.Substring(0, formation.Length - 1) + 'i'); } if (TurkishLanguage.IsBackRoundedVowel(Word.BeforeLastVowel(formationToCheck))) { //buğuluyor, bulguluyor, çamurluyor, aforozluyor return(formation.Substring(0, formation.Length - 1) + 'u'); } if (TurkishLanguage.IsBackUnroundedVowel(Word.BeforeLastVowel(formationToCheck))) { //açıklıyor, çalkalıyor, gazlıyor, gıcırdıyor return(formation.Substring(0, formation.Length - 1) + 'ı'); } } } if (TurkishLanguage.IsVowel(Word.LastPhoneme(formationToCheck))) { if (TurkishLanguage.IsFrontRoundedVowel(Word.BeforeLastVowel(formationToCheck))) { return(formation.Substring(0, formation.Length - 1) + 'ü'); } if (TurkishLanguage.IsFrontUnroundedVowel(Word.BeforeLastVowel(formationToCheck))) { return(formation.Substring(0, formation.Length - 1) + 'i'); } if (TurkishLanguage.IsBackRoundedVowel(Word.BeforeLastVowel(formationToCheck))) { return(formation.Substring(0, formation.Length - 1) + 'u'); } if (TurkishLanguage.IsBackUnroundedVowel(Word.BeforeLastVowel(formationToCheck))) { return(formation.Substring(0, formation.Length - 1) + 'ı'); } } } if (TurkishLanguage.IsFrontRoundedVowel(Word.LastVowel(formationToCheck)) || (TurkishLanguage.IsBackRoundedVowel(Word.LastVowel(formationToCheck)) && root.NotObeysVowelHarmonyDuringAgglutination())) { return(formation + 'ü'); } if (TurkishLanguage.IsFrontUnroundedVowel(Word.LastVowel(formationToCheck)) || (Word.LastVowel(formationToCheck) == 'a' && root.NotObeysVowelHarmonyDuringAgglutination())) { return(formation + 'i'); } if (TurkishLanguage.IsBackRoundedVowel(Word.LastVowel(formationToCheck))) { return(formation + 'u'); } if (TurkishLanguage.IsBackUnroundedVowel(Word.LastVowel(formationToCheck))) { return(formation + 'ı'); } if (root.IsNumeral() || root.IsFraction() || root.IsReal()) { if (root.GetName().EndsWith("6") || root.GetName().EndsWith("40") || root.GetName().EndsWith("60") || root.GetName().EndsWith("90")) { //6'yı, 40'ı, 60'ı return(formation + 'ı'); } if (root.GetName().EndsWith("3") || root.GetName().EndsWith("4") || root.GetName().EndsWith("00")) { //3'ü, 4'ü, 100'ü return(formation + 'ü'); } if (root.GetName().EndsWith("9") || root.GetName().EndsWith("10") || root.GetName().EndsWith("30")) { //9'u, 10'u, 30'u return(formation + 'u'); } //2'yi, 5'i, 8'i return(formation + 'i'); } return(formation); }