private static List<Phrase> DoFindPhrases(string translation, List<Verse> source, FindScope find_scope, Selection current_selection, List<Verse> previous_result, string text, TextLocation text_location, bool case_sensitive, TextWordness wordness, int multiplicity, bool at_word_start) { List<Phrase> result = new List<Phrase>(); List<Verse> found_verses = new List<Verse>(); if (source != null) { if (source.Count > 0) { Book book = source[0].Book; if (!String.IsNullOrEmpty(text)) { RegexOptions regex_options = case_sensitive ? RegexOptions.None : RegexOptions.IgnoreCase; if (text.IsArabic()) // Arabic letters in translation (Emlaaei, Urdu, Farsi, etc.) { regex_options |= RegexOptions.RightToLeft; } try { string pattern_empty_line = @"^$"; string pattern_whole_line = "(" + @"^" + text + @"$" + ")"; string pattern_any_with_prefix = "(" + @"\S+?" + text + ")"; string pattern_any_with_prefix_and_suffix = "(" + @"\S+?" + text + @"\S+?" + ")"; string pattern_any_with_suffix = "(" + text + @"\S+?" + ")"; string pattern_word_with_prefix = "(" + pattern_any_with_prefix + @"\b" + ")"; string pattern_word_with_prefix_and_suffix = "(" + pattern_any_with_prefix_and_suffix + ")"; string pattern_word_with_suffix = "(" + @"\b" + pattern_any_with_suffix + ")"; string pattern_word_with_any_fixes = "(" + pattern_word_with_prefix + "|" + pattern_word_with_prefix_and_suffix + "|" + pattern_any_with_suffix + ")"; // Whole word string pattern_whole_word_at_start = "(" + pattern_whole_line + "|" + @"^" + text + @"\b" + ")"; string pattern_whole_word_at_middle = "(" + pattern_whole_line + "|" + @"(?<!^)" + @"\b" + text + @"\b" + @"(?!$)" + ")"; string pattern_whole_word_at_end = "(" + pattern_whole_line + "|" + @"\b" + text + @"$" + ")"; string pattern_whole_word_anywhere = "(" + pattern_whole_line + "|" + @"\b" + text + @"\b" + ")"; // Part of word string pattern_part_word_at_start = "(" + @"^" + pattern_word_with_any_fixes + ")"; string pattern_part_word_at_middle = "(" + @"(?<!^)" + pattern_word_with_any_fixes + @"(?!$)" + ")"; string pattern_part_word_at_end = "(" + pattern_word_with_any_fixes + @"$" + ")"; string pattern_part_word_anywhere = "(" + pattern_part_word_at_start + "|" + pattern_part_word_at_middle + "|" + pattern_part_word_at_end + ")"; // Any == Whole word | Part of word string pattern_any_at_start = "(" + pattern_whole_line + "|" + @"^" + text + ")"; string pattern_any_at_middle = "(" + pattern_whole_line + "|" + @"(?<!^)" + text + @"(?!$)" + ")"; string pattern_any_at_end = "(" + pattern_whole_line + "|" + text + @"$" + ")"; string pattern_any_anywhere = text; string pattern = null; List<string> negative_words = new List<string>(); List<string> positive_words = new List<string>(); List<string> unsigned_words = new List<string>(); if (at_word_start) { pattern = @"(?<=\b)(" + pattern + @")"; // positive lookbehind } switch (text_location) { case TextLocation.Anywhere: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_anywhere; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_anywhere; } else if (wordness == TextWordness.Any) { pattern += pattern_any_anywhere; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtStart: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_start; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_start; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_start; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtMiddle: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_middle; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_middle; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_middle; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtEnd: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_end; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_end; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_end; } else { pattern += pattern_empty_line; } } break; case TextLocation.AllWords: case TextLocation.AnyWord: { pattern = Regex.Replace(text.Trim(), @"\s+", " "); // remove double space or higher if any string[] pattern_words = pattern.Split(); foreach (string pattern_word in pattern_words) { if (pattern_word.StartsWith("-")) { negative_words.Add(pattern_word.Substring(1)); } else if (pattern_word.EndsWith("-")) { negative_words.Add(pattern_word.Substring(0, pattern_word.Length - 1)); } else if (pattern_word.StartsWith("+")) { positive_words.Add(pattern_word.Substring(1)); } else if (pattern_word.EndsWith("+")) { positive_words.Add(pattern_word.Substring(0, pattern_word.Length - 1)); } else { unsigned_words.Add(pattern_word); } } } break; default: { return new List<Phrase>(); } } // do actual search foreach (Verse verse in source) { if (text_location == TextLocation.AllWords) { bool found = false; foreach (string negative_word in negative_words) { if (verse.Translations[translation].Contains(negative_word)) { found = true; break; } } if (found) continue; foreach (string positive_word in positive_words) { if (!verse.Translations[translation].Contains(positive_word)) { found = true; break; } } if (found) continue; if ( (unsigned_words.Count == 0) || (verse.Translations[translation].ContainsWordsOf(unsigned_words)) ) { found_verses.Add(verse); result.Add(new Phrase(verse, 0, "")); } } else if (text_location == TextLocation.AnyWord) { bool skip = false; foreach (string negative_word in negative_words) { if (verse.Translations[translation].Contains(negative_word)) { skip = true; break; } } if (skip) continue; foreach (string positive_word in positive_words) { if (!verse.Translations[translation].Contains(positive_word)) { skip = true; break; } } if (skip) continue; if ( (negative_words.Count > 0) || (positive_words.Count > 0) || ( (unsigned_words.Count == 0) || (verse.Translations[translation].ContainsWordOf(unsigned_words)) ) ) { found_verses.Add(verse); result.Add(new Phrase(verse, 0, "")); } } else // at start, middle, end, or anywhere { MatchCollection matches = Regex.Matches(verse.Translations[translation], pattern, regex_options); if (multiplicity != -1) // with multiplicity { if (matches.Count >= multiplicity) { found_verses.Add(verse); if (matches.Count > 0) { result.AddRange(BuildPhrasesAndOriginify(verse, matches)); } else { result.Add(new Phrase(verse, 0, "")); } } } else // without multiplicity { if (matches.Count > 0) { found_verses.Add(verse); result.AddRange(BuildPhrasesAndOriginify(verse, matches)); } } } } // end for } catch { // log exception } } } } return result; }
// find by text - Exact public static List<Phrase> FindPhrases(Book book, FindScope find_scope, Selection current_selection, List<Verse> previous_result, string text, LanguageType language_type, string translation, TextLocation text_location, bool case_sensitive, TextWordness wordness, int multiplicity, bool at_word_start, bool with_diacritics) { List<Phrase> result = new List<Phrase>(); if (language_type == LanguageType.Arabic) { result = DoFindPhrases(book, find_scope, current_selection, previous_result, text, language_type, translation, text_location, case_sensitive, wordness, multiplicity, at_word_start, with_diacritics, true); } else if (language_type == LanguageType.Translation) { if (book.Verses != null) { if (book.Verses.Count > 0) { foreach (string key in book.Verses[0].Translations.Keys) { List<Phrase> new_phrases = DoFindPhrases(book, find_scope, current_selection, previous_result, text, language_type, key, text_location, case_sensitive, wordness, multiplicity, at_word_start, with_diacritics, false); result.AddRange(new_phrases); } } } } return result; }
private static List<Phrase> DoFindPhrases(List<Verse> source, FindScope find_scope, Selection current_selection, List<Verse> previous_result, string text, TextLocation text_location, TextWordness wordness, int multiplicity, bool at_word_start, bool with_diacritics, bool try_emlaaei_if_nothing_found) { List<Phrase> result = new List<Phrase>(); List<Verse> found_verses = new List<Verse>(); if (source != null) { if (source.Count > 0) { Book book = source[0].Book; if (!String.IsNullOrEmpty(text)) { RegexOptions regex_options = RegexOptions.IgnoreCase | RegexOptions.RightToLeft; string pattern = null; List<string> unsigned_words = null; List<string> positive_words = null; List<string> negative_words = null; try { if (with_diacritics) { // search in original text first (without simplification) pattern = BuildPattern(text, text_location, wordness, at_word_start, out unsigned_words, out positive_words, out negative_words); if (!String.IsNullOrEmpty(pattern)) { foreach (Verse verse in source) { ///////////////////////// // process negative_words ///////////////////////// if (negative_words.Count > 0) { bool found = false; foreach (string negative_word in negative_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(negative_word)) { found = true; // next verse break; } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(negative_word)) && (word_text.Length > negative_word.Length)) { found = true; // next verse break; } } else if (wordness == TextWordness.WholeWord) { if (word_text == negative_word) { found = true; // next verse break; } } } if (found) { break; } } if (found) continue; // next verse } ///////////////////////// // process positive_words ///////////////////////// if (positive_words.Count > 0) { int matches = 0; foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { matches++; break; // next positive_word } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { matches++; break; // next positive_word } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { matches++; break; // next positive_word } } } } // verse failed test, so skip it if (matches < positive_words.Count) { continue; // next verse } } ////////////////////////////////////////////////////// // both negative and positive conditions have been met ////////////////////////////////////////////////////// ///////////////////////// // process unsigned_words ///////////////////////// ////////////////////////////////////////////////////////// // FindByText WORDS All ////////////////////////////////////////////////////////// if (text_location == TextLocation.AllWords) { int matches = 0; foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { matches++; break; // no need to continue even if there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { matches++; break; // no need to continue even if there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { matches++; break; // no need to continue even if there are more matches } } } } if (matches == unsigned_words.Count) { /////////////////////////////////////////////////////////////// // all negative, positive and unsigned conditions have been met /////////////////////////////////////////////////////////////// // add positive matches foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } // add unsigned matches foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } } else // verse failed test, so skip it { continue; // next verse } } ////////////////////////////////////////////////////////// // FindByText WORDS Any ////////////////////////////////////////////////////////// else if (text_location == TextLocation.AnyWord) { bool found = false; foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found = true; break; // next unsigned_word } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found = true; break; // next unsigned_word } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found = true; break; // next unsigned_word } } } if (found) { break; } } if (found) // found 1 unsigned word in verse, which is enough { /////////////////////////////////////////////////////////////// // all negative, positive and unsigned conditions have been met /////////////////////////////////////////////////////////////// // add positive matches foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } // add unsigned matches foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { string word_text = word.Text; if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } } else // verse failed test, so skip it { continue; // next verse } } ////////////////////////////////////////////////////////// // FindByText EXACT ////////////////////////////////////////////////////////// else // at start, middle, end, or anywhere { string verse_text = verse.Text; //??? whole_word still needs verification in border cases in all text_modes MatchCollection matches = Regex.Matches(verse_text, pattern, regex_options); if (multiplicity != -1) // with multiplicity { if (matches.Count >= multiplicity) { found_verses.Add(verse); if (matches.Count > 0) { result.AddRange(BuildPhrases(verse, matches)); } else { result.Add(new Phrase(verse, 0, "")); } } } else // without multiplicity { if (matches.Count > 0) { found_verses.Add(verse); result.AddRange(BuildPhrases(verse, matches)); } } } } // end for } } //DON'T use else{} in case with_diacritics didn't find any result, so try simplified text first before emlaaei text if (result.Count == 0) { // simplify all text_modes (Original will be simplified29 automatically) text = text.SimplifyTo(s_numerology_system.TextMode); if (!String.IsNullOrEmpty(text)) // re-test in case text was just harakaat which is simplifed to nothing { pattern = BuildPattern(text, text_location, wordness, at_word_start, out unsigned_words, out positive_words, out negative_words); if (!String.IsNullOrEmpty(pattern)) { foreach (Verse verse in source) { ///////////////////////// // process negative_words ///////////////////////// if (negative_words.Count > 0) { bool found = false; foreach (string negative_word in negative_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(negative_word)) { found = true; // next verse break; } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(negative_word)) && (word_text.Length > negative_word.Length)) { found = true; // next verse break; } } else if (wordness == TextWordness.WholeWord) { if (word_text == negative_word) { found = true; // next verse break; } } } if (found) { break; } } if (found) continue; // next verse } ///////////////////////// // process positive_words ///////////////////////// if (positive_words.Count > 0) { int matches = 0; foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { matches++; break; // next positive_word } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { matches++; break; // next positive_word } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { matches++; break; // next positive_word } } } } // verse failed test, so skip it if (matches < positive_words.Count) { continue; // next verse } } ////////////////////////////////////////////////////// // both negative and positive conditions have been met ////////////////////////////////////////////////////// ///////////////////////// // process unsigned_words ///////////////////////// ////////////////////////////////////////////////////////// // FindByText WORDS All ////////////////////////////////////////////////////////// if (text_location == TextLocation.AllWords) { int matches = 0; foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { matches++; break; // no need to continue even if there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { matches++; break; // no need to continue even if there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { matches++; break; // no need to continue even if there are more matches } } } } if (matches == unsigned_words.Count) { /////////////////////////////////////////////////////////////// // all negative, positive and unsigned conditions have been met /////////////////////////////////////////////////////////////// // add positive matches foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } // add unsigned matches foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } } else // verse failed test, so skip it { continue; // next verse } } ////////////////////////////////////////////////////////// // FindByText WORDS Any ////////////////////////////////////////////////////////// else if (text_location == TextLocation.AnyWord) { bool found = false; foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found = true; break; // next unsigned_word } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found = true; break; // next unsigned_word } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found = true; break; // next unsigned_word } } } if (found) { break; } } if (found) // found 1 unsigned word in verse, which is enough { /////////////////////////////////////////////////////////////// // all negative, positive and unsigned conditions have been met /////////////////////////////////////////////////////////////// // add positive matches foreach (string positive_word in positive_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(positive_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(positive_word)) && (word_text.Length > positive_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == positive_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } // add unsigned matches foreach (string unsigned_word in unsigned_words) { foreach (Word word in verse.Words) { // simplify all text_modes (Original will be simplified29 automatically) string word_text = word.Text.SimplifyTo(s_numerology_system.TextMode); if (wordness == TextWordness.Any) { if (word_text.Contains(unsigned_word)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.PartOfWord) { if ((word_text.Contains(unsigned_word)) && (word_text.Length > unsigned_word.Length)) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } else if (wordness == TextWordness.WholeWord) { if (word_text == unsigned_word) { found_verses.Add(verse); result.Add(new Phrase(verse, word.Position, word.Text)); //break; // no break in case there are more matches } } } } } else // verse failed test, so skip it { continue; // next verse } } ////////////////////////////////////////////////////////// // FindByText EXACT ////////////////////////////////////////////////////////// else // at start, middle, end, or anywhere { // simplify all text_modes (Original will be simplified29 automatically) string verse_text = verse.Text.SimplifyTo(s_numerology_system.TextMode); MatchCollection matches = Regex.Matches(verse_text, pattern, regex_options); if (multiplicity != -1) // with multiplicity { if (matches.Count >= multiplicity) { found_verses.Add(verse); if (matches.Count > 0) { result.AddRange(BuildPhrasesAndOriginify(verse, matches)); } else { result.Add(new Phrase(verse, 0, "")); } } } else // without multiplicity { if (matches.Count > 0) { found_verses.Add(verse); result.AddRange(BuildPhrasesAndOriginify(verse, matches)); } } } } // end for } } } // if nothing found if ((multiplicity != 0) && (result.Count == 0)) { // search in emlaaei if (try_emlaaei_if_nothing_found) { // always simplify29 for emlaaei comparison pattern = pattern.Simplify29(); pattern = pattern.Trim(); while (pattern.Contains(" ")) { pattern = pattern.Replace(" ", " "); } if ((source != null) && (source.Count > 0)) { foreach (Verse verse in source) { // always simplify29 for emlaaei comparison string simplified_emlaaei_text = verse.Translations[DEFAULT_EMLAAEI_TEXT].Simplify29(); simplified_emlaaei_text = simplified_emlaaei_text.Trim(); while (simplified_emlaaei_text.Contains(" ")) { simplified_emlaaei_text = simplified_emlaaei_text.Replace(" ", " "); } if (text_location == TextLocation.AllWords) { bool found = false; foreach (string pattern_word in negative_words) { if (simplified_emlaaei_text.Contains(pattern_word)) { found = true; break; } } if (found) continue; foreach (string pattern_word in positive_words) { if (!simplified_emlaaei_text.Contains(pattern_word)) { found = true; break; } } if (found) continue; if ( (unsigned_words.Count == 0) || (simplified_emlaaei_text.ContainsWordsOf(unsigned_words)) ) { result.Add(new Phrase(verse, 0, "")); } } else if (text_location == TextLocation.AnyWord) { bool found = false; foreach (string pattern_word in negative_words) { if (simplified_emlaaei_text.Contains(pattern_word)) { found = true; break; } } if (found) continue; foreach (string pattern_word in positive_words) { if (!simplified_emlaaei_text.Contains(pattern_word)) { found = true; break; } } if (found) continue; if ( (negative_words.Count > 0) || (positive_words.Count > 0) || ( (unsigned_words.Count == 0) || (simplified_emlaaei_text.ContainsWordOf(unsigned_words)) ) ) { result.Add(new Phrase(verse, 0, "")); } } else // at start, middle, end, or anywhere { MatchCollection matches = Regex.Matches(simplified_emlaaei_text, pattern, regex_options); if (multiplicity != -1) // with multiplicity { if (matches.Count >= multiplicity) { // don't colorize emaleei matches to let user know this is unofficial spelling //result.AddRange(BuildPhrases(verse, matches)); result.Add(new Phrase(verse, 0, "")); } } else // without multiplicity { if (matches.Count > 0) { // don't colorize emaleei matches to let user know this is unofficial spelling //result.AddRange(BuildPhrases(verse, matches)); result.Add(new Phrase(verse, 0, "")); } } } } // end for } } } } catch { // log exception } } } } return result; }
private static List<Phrase> DoFindPhrases(Book book, FindScope find_scope, Selection current_selection, List<Verse> previous_result, string text, LanguageType language_type, string translation, TextLocation text_location, bool case_sensitive, TextWordness wordness, int multiplicity, bool at_word_start, bool with_diacritics, bool try_emlaaei_if_nothing_found) { List<Verse> source = GetSourceVerses(book, find_scope, current_selection, previous_result); if (language_type == LanguageType.Arabic) { return DoFindPhrases(source, find_scope, current_selection, previous_result, text, text_location, wordness, multiplicity, at_word_start, with_diacritics, try_emlaaei_if_nothing_found); } else //if (language_type == FindByTextLanguageType.Translation) { return DoFindPhrases(translation, source, find_scope, current_selection, previous_result, text, text_location, case_sensitive, wordness, multiplicity, at_word_start); } }
private static string BuildPattern(string text, TextLocation text_location, TextWordness wordness, bool at_word_start, out List<string> unsigned_words, out List<string> positive_words, out List<string> negative_words ) { string pattern = null; unsigned_words = new List<string>(); positive_words = new List<string>(); negative_words = new List<string>(); if (String.IsNullOrEmpty(text)) return text; text = text.Trim(); /* ===================================================================== Regular Expressions (RegEx) ===================================================================== Best Reference: http://www.regular-expressions.info/ ===================================================================== Matches Characters x character x \\ backslash character \0n character with octal value 0n (0 <= n <= 7) \0nn character with octal value 0nn (0 <= n <= 7) \0mnn character with octal value 0mnn (0 <= m <= 3, 0 <= n <= 7) \xhh character with hexadecimal value 0xhh \uhhhh character with hexadecimal value 0xhhhh \t tab character ('\u0009') \n newline (line feed) character ('\u000A') \r carriage-return character ('\u000D') \f form-feed character ('\u000C') \a alert (bell) character ('\u0007') \e escape character ('\u001B') \cx control character corresponding to x Character Classes [abc] a, b, or c (simple class) [^abc] any character except a, b, or c (negation) [a-zA-Z] a through z or A through Z, inclusive (range) [a-d[m-p]] a through d, or m through p: [a-dm-p] (union) [a-z&&[def]] d, e, or f (intersection) [a-z&&[^bc]] a through z, except for b and c: [ad-z] (subtraction) [a-z&&[^m-p]] a through z, and not m through p: [a-lq-z] (subtraction) Predefined . any character (inc line terminators) except newline \d digit [0-9] \D non-digit [^0-9] \s whitespace character [ \t\n\x0B\f\r] \S non-whitespace character [^\s] \w word character (alphanumeric) [a-zA-Z_0-9] \W non-word character [^\w] Boundary Matchers ^ beginning of a line (in Multiline) $ end of a line (in Multiline) \b word boundary \B non-word boundary \A beginning of the input \G end of the previous match \Z end of the input but for the final terminator, if any \z end of the input Greedy quantifiers X? X, once or not at all X* X, zero or more times X+ X, one or more times X{n} X, exactly n times X{n,} X, at least n times X{n,m} X, at least n but not more than m times Reluctant quantifiers X?? X, once or not at all X*? X, zero or more times X+? X, one or more times X{n}? X, exactly n times X{n,}? X, at least n times X{n,m}? X, at least n but not more than m times Possessive quantifiers X?+ X, once or not at all X*+ X, zero or more times X++ X, one or more times X{n}+ X, exactly n times X{n,}+ X, at least n times X{n,m}+ X, at least n but not more than m times positive lookahead (?=text) negative lookahead (?!text) // eg: not at end of line (?!$) positive lookbehind (?<=text) negative lookbehind (?<!text) // eg: not at start of line (?<!^) ===================================================================== */ string pattern_empty_line = @"^$"; string pattern_whole_line = "(" + @"^" + text + @"$" + ")"; string pattern_any_with_prefix = "(" + @"\S+?" + text + ")"; string pattern_any_with_prefix_and_suffix = "(" + @"\S+?" + text + @"\S+?" + ")"; string pattern_any_with_suffix = "(" + text + @"\S+?" + ")"; string pattern_word_with_prefix = "(" + pattern_any_with_prefix + @"\b" + ")"; string pattern_word_with_prefix_and_suffix = "(" + pattern_any_with_prefix_and_suffix + ")"; string pattern_word_with_suffix = "(" + @"\b" + pattern_any_with_suffix + ")"; string pattern_word_with_any_fixes = "(" + pattern_word_with_prefix + "|" + pattern_word_with_prefix_and_suffix + "|" + pattern_any_with_suffix + ")"; // Whole word string pattern_whole_word_at_start = "(" + pattern_whole_line + "|" + @"^" + text + @"\b" + ")"; string pattern_whole_word_at_middle = "(" + pattern_whole_line + "|" + @"(?<!^)" + @"\b" + text + @"\b" + @"(?!$)" + ")"; string pattern_whole_word_at_end = "(" + pattern_whole_line + "|" + @"\b" + text + @"$" + ")"; string pattern_whole_word_anywhere = "(" + pattern_whole_line + "|" + @"\b" + text + @"\b" + ")"; // Part of word string pattern_part_word_at_start = "(" + @"^" + pattern_word_with_any_fixes + ")"; string pattern_part_word_at_middle = "(" + @"(?<!^)" + pattern_word_with_any_fixes + @"(?!$)" + ")"; string pattern_part_word_at_end = "(" + pattern_word_with_any_fixes + @"$" + ")"; string pattern_part_word_anywhere = "(" + pattern_part_word_at_start + "|" + pattern_part_word_at_middle + "|" + pattern_part_word_at_end + ")"; // Any == Whole word | Part of word string pattern_any_at_start = "(" + pattern_whole_line + "|" + @"^" + text + ")"; string pattern_any_at_middle = "(" + pattern_whole_line + "|" + @"(?<!^)" + text + @"(?!$)" + ")"; string pattern_any_at_end = "(" + pattern_whole_line + "|" + text + @"$" + ")"; string pattern_any_anywhere = text; if (at_word_start) { pattern = @"(?<=\b)(" + pattern + @")"; // positive lookbehind } switch (text_location) { case TextLocation.Anywhere: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_anywhere; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_anywhere; } else if (wordness == TextWordness.Any) { pattern += pattern_any_anywhere; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtStart: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_start; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_start; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_start; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtMiddle: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_middle; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_middle; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_middle; } else { pattern += pattern_empty_line; } } break; case TextLocation.AtEnd: { if (wordness == TextWordness.WholeWord) { pattern += pattern_whole_word_at_end; } else if (wordness == TextWordness.PartOfWord) { pattern += pattern_part_word_at_end; } else if (wordness == TextWordness.Any) { pattern += pattern_any_at_end; } else { pattern += pattern_empty_line; } } break; case TextLocation.AllWords: case TextLocation.AnyWord: { pattern = Regex.Replace(text.Trim(), @"\s+", " "); // remove double space or higher if any string[] pattern_words = pattern.Split(); foreach (string pattern_word in pattern_words) { if (pattern_word.StartsWith("-")) { if (negative_words != null) { negative_words.Add(pattern_word.Substring(1)); } } else if (pattern_word.EndsWith("-")) { if (negative_words != null) { negative_words.Add(pattern_word.Substring(0, pattern_word.Length - 1)); } } else if (pattern_word.StartsWith("+")) { if (positive_words != null) { positive_words.Add(pattern_word.Substring(1)); } } else if (pattern_word.EndsWith("+")) { if (positive_words != null) { positive_words.Add(pattern_word.Substring(0, pattern_word.Length - 1)); } } else { if (unsigned_words != null) { unsigned_words.Add(pattern_word); } } } } break; } return pattern; }
private void FindByText(string text, LanguageType language_type, string translation, TextLocation text_location, bool case_sensitive, TextWordness wordness, int multiplicity, bool at_word_start, bool with_diacritics) { if (m_client != null) { PrepareNewSearch(); if (!String.IsNullOrEmpty(text)) { m_client.FindPhrases(text, language_type, translation, text_location, case_sensitive, wordness, multiplicity, at_word_start, m_with_diacritics); if (m_client.FoundPhrases != null) { int phrase_count = m_client.FoundPhrases.Count; if (m_client.FoundVerses != null) { int verse_count = m_client.FoundVerses.Count; if (multiplicity == 0) { m_find_result_header = verse_count + ((verse_count == 1) ? " verse" : " verses") + " without " + text + " " + text_location.ToString() + " in " + m_client.FindScope.ToString(); } else { m_find_result_header = phrase_count + " matches in " + verse_count + ((verse_count == 1) ? " verse" : " verses") + " with " + text + " " + text_location.ToString() + " in " + m_client.FindScope.ToString(); } DisplayFoundVerses(true); } } } } }
// find by text - Exact /// <summary> /// Find phrases for given exact text that meet all parameters. /// </summary> /// <param name="text"></param> /// <param name="language_type"></param> /// <param name="translation"></param> /// <param name="text_location"></param> /// <param name="case_sensitive"></param> /// <param name="wordness"></param> /// <param name="multiplicity"></param> /// <param name="at_word_start"></param> /// <returns>Number of found phrases. Result is stored in FoundPhrases.</returns> public int FindPhrases(string text, LanguageType language_type, string translation, TextLocation text_location, bool case_sensitive, TextWordness wordness, int multiplicity, bool at_word_start, bool with_diacritics) { m_found_phrases = Server.FindPhrases(m_book, m_find_scope, m_selection, m_found_verses, text, language_type, translation, text_location, case_sensitive, wordness, multiplicity, at_word_start, with_diacritics); if (m_found_phrases != null) { m_found_verses = new List<Verse>(); foreach (Phrase phrase in m_found_phrases) { if (phrase != null) { if (!m_found_verses.Contains(phrase.Verse)) { m_found_verses.Add(phrase.Verse); } } } return m_found_phrases.Count; } return 0; }