// copy constructor //Type Position Attribute Qualifier PersonDegree PersonGender PersonNumber Mood Lemma Root SpecialGroup public WordPartGrammar(WordPartGrammar grammar) { if (grammar != null) { this.word_part = grammar.WordPart; this.type = grammar.Type; this.position = grammar.Position; this.attribute = grammar.Attribute; this.qualifier = grammar.Qualifier; this.person_degree = grammar.PersonDegree; this.person_gender = grammar.PersonGender; this.person_number = grammar.PersonNumber; this.mood = grammar.Mood; this.lemma = grammar.Lemma; this.root = grammar.Root; this.special_group = grammar.SpecialGroup; } }
//Type Position Attribute Qualifier PersonDegree PersonGender PersonNumber Mood Lemma Root SpecialGroup // copy constructor public WordPartGrammar(WordPartGrammar grammar) { if (grammar != null) { this.word_part = grammar.WordPart; this.type = grammar.Type; this.position = grammar.Position; this.attribute = grammar.Attribute; this.qualifier = grammar.Qualifier; this.person_degree = grammar.PersonDegree; this.person_gender = grammar.PersonGender; this.person_number = grammar.PersonNumber; this.mood = grammar.Mood; this.lemma = grammar.Lemma; this.root = grammar.Root; this.special_group = grammar.SpecialGroup; } }
//Type Position Attribute Qualifier PersonDegree PersonGender PersonNumber Mood Lemma Root SpecialGroup // instance constructor public WordPartGrammar(WordPart word_part, List<string> grammar) { if (word_part != null) { this.word_part = word_part; if (grammar != null) { if (grammar.Count > 1) { this.type = grammar[0]; switch (this.type) { case "PREFIX": { //Al+ //bi+ //bip+ //ka+ //ta+ //sa+ //ya+ //ha+ //A_INTG+ //A_EQ+" //b_PART+ //f_REM+ //f_CONJ+ //f_RSLT+ //f_SUP+ //f_CAUS+ //l_PP+ //l_EMPH+ //l_PRP+ //l_IMPV+ //w_CONJ+ //w_REM+ //w_CIRC+ //w_SUP+ //w_PP+ //w_COM+ if (grammar.Count == 2) { this.position = grammar[1]; } else { throw new Exception("WordPartGrammar: Invalide PREFIX at word part " + word_part.Address); } } break; case "STEM": { for (int i = 1; i < grammar.Count; i++) { string[] parts = grammar[i].Split(':'); if (parts.Length == 2) { if (parts[0] == "POS") { //INL //N //PN //V //ADJ //IMPN //AC //AMD //ANS //AVR //CERT //COND //EXH //EXL //EXP //FUT //INC //INT //INTG //NEG //PREV //PRO //RES //RET //SUP //SUR //PP //CONJ //SUB //EQ //REM //CIRC //COM //RSLT //CAUS //EMPH //PRP //IMPV //PRON //DEM //REL //T //LOC this.position = parts[1]; } else if (parts[0] == "LEM") { this.lemma = parts[1]; } else if (parts[0] == "ROOT") { this.root = parts[1]; } else if (parts[0] == "SP") { this.special_group = parts[1]; } else if (parts[0] == "MOOD") { this.mood = parts[1]; } else { throw new Exception("WordPartGrammar: Invalide STEM at word part " + word_part.Address); } } else if (parts.Length == 1) // attribute or qualifier { switch (parts[0]) { case "1": case "2": case "3": this.person_degree = parts[0][0].ToString(); break; case "M": case "F": this.person_gender = parts[0][0].ToString(); break; case "S": case "D": case "P": this.person_number = parts[0][0].ToString(); break; case "1M": case "2M": case "3M": case "1F": case "2F": case "3F": this.person_degree = parts[0][0].ToString(); this.person_gender = parts[0][1].ToString(); break; case "1S": case "2S": case "3S": case "1D": case "2D": case "3D": case "1P": case "2P": case "3P": this.person_degree = parts[0][0].ToString(); this.person_number = parts[0][1].ToString(); break; case "MS": case "FS": case "MD": case "FD": case "MP": case "FP": this.person_gender = parts[0][0].ToString(); this.person_number = parts[0][1].ToString(); break; case "1MS": case "2MS": case "3MS": case "1FS": case "2FS": case "3FS": case "1MD": case "2MD": case "3MD": case "1FD": case "2FD": case "3FD": case "1MP": case "2MP": case "3MP": case "1FP": case "2FP": case "3FP": this.person_degree = parts[0][0].ToString(); this.person_gender = parts[0][1].ToString(); this.person_number = parts[0][2].ToString(); break; default: if (String.IsNullOrEmpty(this.attribute)) { //VN //ACT_PCPL //PASS_PCPL //NOM //ACC //GEN //DEF //INDEF //PERF //IMPF //IMPV this.attribute = parts[0]; } else { //NOM //ACC //ACT //PASS //(I) //(II) //(III) //(IV) //(V) //(VI) //(VII) //(VIII) //(IX) //(X) //(XI) //(XII) this.qualifier = parts[0]; } break; } } } } break; case "SUFFIX": { if (grammar.Count == 2) { //SUFFIX|+n_EMPH //SUFFIX|+VOC //SUFFIX|+l_PP //SUFFIX|+PRON:2MP //SUFFIX|+A_SILENT string[] parts = grammar[1].Split(':'); if (parts.Length == 1) { this.position = parts[0]; } else if (parts.Length == 2) { if (parts[0] == "+PRON") { this.position = parts[0]; if (parts[1].Length == 3) { this.person_degree = parts[1][0].ToString(); this.person_gender = parts[1][1].ToString(); this.person_number = parts[1][2].ToString(); } else if (parts[1].Length == 2) { if ((parts[1][0] == '1') || (parts[1][0] == '2') || (parts[1][0] == '3')) { this.person_degree = parts[1][0].ToString(); } else if ((parts[1][0] == 'M') || (parts[1][0] == 'F')) { this.person_gender = parts[1][0].ToString(); } else if ((parts[1][0] == 'S') || (parts[1][0] == 'D') || (parts[1][0] == 'P')) { this.person_number = parts[1][0].ToString(); } if ((parts[1][1] == 'M') || (parts[1][1] == 'F')) { this.person_gender = parts[1][1].ToString(); } else if ((parts[1][1] == 'S') || (parts[1][1] == 'D') || (parts[1][1] == 'P')) { this.person_number = parts[1][1].ToString(); } } else if (parts[1].Length == 1) { if ((parts[1][0] == '1') || (parts[1][0] == '2') || (parts[1][0] == '3')) { this.person_degree = parts[1][0].ToString(); } else if ((parts[1][0] == 'M') || (parts[1][0] == 'F')) { this.person_gender = parts[1][0].ToString(); } else if ((parts[1][0] == 'S') || (parts[1][0] == 'D') || (parts[1][0] == 'P')) { this.person_number = parts[1][0].ToString(); } } } else { throw new Exception("WordPartGrammar: SUFFIX|+PRON expected at word part " + word_part.Address); } } else { throw new Exception("WordPartGrammar: Invalide SUFFIX at word part " + word_part.Address); } } else { throw new Exception("WordPartGrammar: Invalide SUFFIX at word part " + word_part.Address); } } break; default: break; } } } } }
// corpus word parts from http://corpus.quran.com version 0.4 - modified by Ali Adams public static void LoadWordParts(Book book) { if (book != null) { string filename = Globals.DATA_FOLDER + "/" + "word-parts.txt"; if (File.Exists(filename)) { using (StreamReader reader = File.OpenText(filename)) { int waw_count = 0; int previous_verse_number = 0; while (!reader.EndOfStream) { try { string line = reader.ReadLine(); if ((line.Length == 0) || line.StartsWith("#") || line.StartsWith("LOCATION") || line.StartsWith("ADDRESS")) { continue; // skip header info } else { string[] parts = line.Split('\t'); if (parts.Length >= 4) { string address = parts[0]; if (address.StartsWith("(") && address.EndsWith(")")) { address = parts[0].Substring(1, parts[0].Length - 2); } string[] address_parts = address.Split(':'); if (address_parts.Length == 4) { int chapter_number = int.Parse(address_parts[0]); int verse_number = int.Parse(address_parts[1]); if (previous_verse_number != verse_number) { waw_count = 0; previous_verse_number = verse_number; } int word_number = int.Parse(address_parts[2]) + waw_count; int word_part_number = int.Parse(address_parts[3]); string buckwalter = parts[1]; string tag = parts[2]; Chapter chapter = book.Chapters[chapter_number - 1]; if (chapter != null) { Verse verse = chapter.Verses[verse_number - 1]; if (verse != null) { // add bismAllah manually to each chapter except 1 and 9 if ( ((chapter_number != 1) && (chapter_number != 9)) && ((verse_number == 1) && (word_number == 1) && (word_part_number == 1)) ) { Verse bismAllah_verse = book.Verses[0]; // if there is no bismAllah, add one if (parts[1] != bismAllah_verse.Words[0].Parts[0].Buckwalter) { // insert 4 new words verse.Words.InsertRange(0, new List<Word>(4)); //(1:1:1:1) bi PP PREFIX|bi+ WordPart word_part = new WordPart(verse.Words[0], bismAllah_verse.Words[0].Parts[0].NumberInWord, bismAllah_verse.Words[0].Parts[0].Buckwalter, bismAllah_verse.Words[0].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[0].Parts[0].Grammar) ); if ((chapter_number == 95) || (chapter_number == 97)) { // add shadda { '~', 'ّ' } on B or bism word_part.Buckwalter = word_part.Buckwalter.Insert(1, "~"); } //(1:1:1:2) somi N STEM|POS:N|LEM:{som|ROOT:smw|M|GEN new WordPart(verse.Words[0], bismAllah_verse.Words[0].Parts[1].NumberInWord, bismAllah_verse.Words[0].Parts[1].Buckwalter, bismAllah_verse.Words[0].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[0].Parts[1].Grammar) ); //(1:1:2:1) {ll~ahi PN STEM|POS:PN|LEM:{ll~ah|ROOT:Alh|GEN new WordPart(verse.Words[1], bismAllah_verse.Words[1].Parts[0].NumberInWord, bismAllah_verse.Words[1].Parts[0].Buckwalter, bismAllah_verse.Words[1].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[1].Parts[0].Grammar) ); //(1:1:3:1) {l DET PREFIX|Al+ new WordPart(verse.Words[2], bismAllah_verse.Words[2].Parts[0].NumberInWord, bismAllah_verse.Words[2].Parts[0].Buckwalter, bismAllah_verse.Words[2].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[2].Parts[0].Grammar) ); //(1:1:3:2) r~aHoma`ni ADJ STEM|POS:ADJ|LEM:r~aHoma`n|ROOT:rHm|MS|GEN new WordPart(verse.Words[2], bismAllah_verse.Words[2].Parts[1].NumberInWord, bismAllah_verse.Words[2].Parts[1].Buckwalter, bismAllah_verse.Words[2].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[2].Parts[1].Grammar) ); //(1:1:4:1) {l DET PREFIX|Al+ new WordPart(verse.Words[3], bismAllah_verse.Words[3].Parts[0].NumberInWord, bismAllah_verse.Words[3].Parts[0].Buckwalter, bismAllah_verse.Words[3].Parts[0].Tag, new WordPartGrammar(bismAllah_verse.Words[3].Parts[0].Grammar) ); //(1:1:4:2) r~aHiymi ADJ STEM|POS:ADJ|LEM:r~aHiym|ROOT:rHm|MS|GEN new WordPart(verse.Words[3], bismAllah_verse.Words[3].Parts[1].NumberInWord, bismAllah_verse.Words[3].Parts[1].Buckwalter, bismAllah_verse.Words[3].Parts[1].Tag, new WordPartGrammar(bismAllah_verse.Words[3].Parts[1].Grammar) ); } } // correct word_number (if needed) for all subsequenct chapter word_parts if ( ((chapter_number != 1) && (chapter_number != 9)) && (verse_number == 1) ) { word_number += 4; } Word word = verse.Words[word_number - 1]; if (word != null) { List<string> grammar = new List<string>(parts[3].Split('|')); if (grammar.Count > 0) { //(1:5:3:1) wa CONJ PREFIX|w_CONJ+ //(1:5:3:2) <iy~aAka PRON STEM|POS:PRON|LEM:<iy~aA|2MS if (word.Text == "و") { waw_count++; } new WordPart(word, word_part_number, buckwalter, tag, grammar); } else { throw new Exception("Grammar field is missing.\r\n" + filename); } } } } } else { throw new Exception("Invalid Location Format.\r\n" + filename); } } else { throw new Exception("Invalid File Format.\r\n" + filename); } } } catch (Exception ex) { throw new Exception("LoadWordParts: " + ex.Message); } } } } } }
// instance constructor //Type Position Attribute Qualifier PersonDegree PersonGender PersonNumber Mood Lemma Root SpecialGroup public WordPartGrammar(WordPart word_part, List <string> grammar) { if (word_part != null) { this.word_part = word_part; if (grammar != null) { if (grammar.Count > 1) { this.type = grammar[0]; switch (this.type) { case "PREFIX": { //Al+ //bi+ //bip+ //ka+ //ta+ //sa+ //ya+ //ha+ //A_INTG+ //A_EQ+" //b_PART+ //f_REM+ //f_CONJ+ //f_RSLT+ //f_SUP+ //f_CAUS+ //l_PP+ //l_EMPH+ //l_PRP+ //l_IMPV+ //w_CONJ+ //w_REM+ //w_CIRC+ //w_SUP+ //w_PP+ //w_COM+ if (grammar.Count == 2) { this.position = grammar[1]; } else { throw new Exception("WordPartGrammar: Invalide PREFIX at word part " + word_part.Address); } } break; case "STEM": { for (int i = 1; i < grammar.Count; i++) { string[] parts = grammar[i].Split(':'); if (parts.Length == 2) { if (parts[0] == "POS") { //INL //N //PN //V //ADJ //IMPN //AC //AMD //ANS //AVR //CERT //COND //EXH //EXL //EXP //FUT //INC //INT //INTG //NEG //PREV //PRO //RES //RET //SUP //SUR //PP //CONJ //SUB //EQ //REM //CIRC //COM //RSLT //CAUS //EMPH //PRP //IMPV //PRON //DEM //REL //T //LOC this.position = parts[1]; } else if (parts[0] == "LEM") { this.lemma = parts[1]; } else if (parts[0] == "ROOT") { this.root = parts[1]; } else if (parts[0] == "SP") { this.special_group = parts[1]; } else if (parts[0] == "MOOD") { this.mood = parts[1]; } else { throw new Exception("WordPartGrammar: Invalide STEM at word part " + word_part.Address); } } else if (parts.Length == 1) // attribute or qualifier { switch (parts[0]) { case "1": case "2": case "3": this.person_degree = parts[0][0].ToString(); break; case "M": case "F": this.person_gender = parts[0][0].ToString(); break; case "S": case "D": case "P": this.person_number = parts[0][0].ToString(); break; case "1M": case "2M": case "3M": case "1F": case "2F": case "3F": this.person_degree = parts[0][0].ToString(); this.person_gender = parts[0][1].ToString(); break; case "1S": case "2S": case "3S": case "1D": case "2D": case "3D": case "1P": case "2P": case "3P": this.person_degree = parts[0][0].ToString(); this.person_number = parts[0][1].ToString(); break; case "MS": case "FS": case "MD": case "FD": case "MP": case "FP": this.person_gender = parts[0][0].ToString(); this.person_number = parts[0][1].ToString(); break; case "1MS": case "2MS": case "3MS": case "1FS": case "2FS": case "3FS": case "1MD": case "2MD": case "3MD": case "1FD": case "2FD": case "3FD": case "1MP": case "2MP": case "3MP": case "1FP": case "2FP": case "3FP": this.person_degree = parts[0][0].ToString(); this.person_gender = parts[0][1].ToString(); this.person_number = parts[0][2].ToString(); break; default: if (String.IsNullOrEmpty(this.attribute)) { //VN //ACT_PCPL //PASS_PCPL //NOM //ACC //GEN //DEF //INDEF //PERF //IMPF //IMPV this.attribute = parts[0]; } else { //NOM //ACC //ACT //PASS //(I) //(II) //(III) //(IV) //(V) //(VI) //(VII) //(VIII) //(IX) //(X) //(XI) //(XII) this.qualifier = parts[0]; } break; } } } } break; case "SUFFIX": { if (grammar.Count == 2) { //SUFFIX|+n_EMPH //SUFFIX|+VOC //SUFFIX|+l_PP //SUFFIX|+PRON:2MP //SUFFIX|+A_SILENT string[] parts = grammar[1].Split(':'); if (parts.Length == 1) { this.position = parts[0]; } else if (parts.Length == 2) { if (parts[0] == "+PRON") { this.position = parts[0]; if (parts[1].Length == 3) { this.person_degree = parts[1][0].ToString(); this.person_gender = parts[1][1].ToString(); this.person_number = parts[1][2].ToString(); } else if (parts[1].Length == 2) { if ((parts[1][0] == '1') || (parts[1][0] == '2') || (parts[1][0] == '3')) { this.person_degree = parts[1][0].ToString(); } else if ((parts[1][0] == 'M') || (parts[1][0] == 'F')) { this.person_gender = parts[1][0].ToString(); } else if ((parts[1][0] == 'S') || (parts[1][0] == 'D') || (parts[1][0] == 'P')) { this.person_number = parts[1][0].ToString(); } if ((parts[1][1] == 'M') || (parts[1][1] == 'F')) { this.person_gender = parts[1][1].ToString(); } else if ((parts[1][1] == 'S') || (parts[1][1] == 'D') || (parts[1][1] == 'P')) { this.person_number = parts[1][1].ToString(); } } else if (parts[1].Length == 1) { if ((parts[1][0] == '1') || (parts[1][0] == '2') || (parts[1][0] == '3')) { this.person_degree = parts[1][0].ToString(); } else if ((parts[1][0] == 'M') || (parts[1][0] == 'F')) { this.person_gender = parts[1][0].ToString(); } else if ((parts[1][0] == 'S') || (parts[1][0] == 'D') || (parts[1][0] == 'P')) { this.person_number = parts[1][0].ToString(); } } } else { throw new Exception("WordPartGrammar: SUFFIX|+PRON expected at word part " + word_part.Address); } } else { throw new Exception("WordPartGrammar: Invalide SUFFIX at word part " + word_part.Address); } } else { throw new Exception("WordPartGrammar: Invalide SUFFIX at word part " + word_part.Address); } } break; default: break; } } } } }