/// <summary> /// Calculates the similarity between two vectors. /// </summary> /// <param name="u">1st vector</param> /// <param name="v">2nd vector</param> /// <param name="similarityMethod">Algorithm to use for the similarity calculation</param> /// <returns>Similarity between the two vectors</returns> public static double SimilarityTo(this InsightVector u, InsightVector v, SimilarityMethod similarityMethod) { switch (similarityMethod) { case SimilarityMethod.CosineSimilarity: return new CosineSimilarity().CalculateSimilarity(u, v); case SimilarityMethod.JaccardCoefficient: return new JaccardCoefficient().CalculateSimilarity(u, v); default: return new PearsonCorrelation().CalculateSimilarity(u, v); } }
// find by similarity - verse similar to given verse public static List<Verse> FindVerses(Book book, FindScope find_scope, Selection current_selection, List<Verse> previous_result, Verse verse, SimilarityMethod similarity_method, double similarity_percentage) { return DoFindVerses(book, find_scope, current_selection, previous_result, verse, similarity_method, similarity_percentage); }
private static List<Verse> DoFindVerses(List<Verse> source, FindScope find_scope, Selection current_selection, List<Verse> previous_result, Verse verse, SimilarityMethod find_similarity_method, double similarity_percentage) { List<Verse> result = new List<Verse>(); if (source != null) { if (source.Count > 0) { Book book = source[0].Book; if (verse != null) { switch (find_similarity_method) { case SimilarityMethod.SimilarText: { for (int j = 0; j < source.Count; j++) { if (verse.Text.IsSimilarTo(source[j].Text, similarity_percentage)) { result.Add(source[j]); } } } break; case SimilarityMethod.SimilarWords: { for (int j = 0; j < source.Count; j++) { if (verse.Text.HasSimilarWordsTo(source[j].Text, (int)Math.Round((Math.Min(verse.Words.Count, source[j].Words.Count) * similarity_percentage)), 1.0)) { result.Add(source[j]); } } } break; case SimilarityMethod.SimilarFirstHalf: { for (int j = 0; j < source.Count; j++) { if (verse.Text.HasSimilarFirstHalfTo(source[j].Text, similarity_percentage)) { result.Add(source[j]); } } } break; case SimilarityMethod.SimilarLastHalf: { for (int j = 0; j < source.Count; j++) { if (verse.Text.HasSimilarLastHalfTo(source[j].Text, similarity_percentage)) { result.Add(source[j]); } } } break; case SimilarityMethod.SimilarFirstWord: { for (int j = 0; j < source.Count; j++) { if (verse.Text.HasSimilarFirstWordTo(source[j].Text, similarity_percentage)) { result.Add(source[j]); } } } break; case SimilarityMethod.SimilarLastWord: { for (int j = 0; j < source.Count; j++) { if (verse.Text.HasSimilarLastWordTo(source[j].Text, similarity_percentage)) { result.Add(source[j]); } } } break; default: break; } } } } return result; }
private static List<Verse> DoFindVerses(Book book, FindScope find_scope, Selection current_selection, List<Verse> previous_result, Verse verse, SimilarityMethod similarity_method, double similarity_percentage) { List<Verse> source = GetSourceVerses(book, find_scope, current_selection, previous_result); return DoFindVerses(source, find_scope, current_selection, previous_result, verse, similarity_method, similarity_percentage); }
private static List<List<Verse>> DoFindVerseRanges(List<Verse> source, FindScope find_scope, Selection current_selection, List<Verse> previous_result, SimilarityMethod find_similarity_method, double similarity_percentage) { List<List<Verse>> result = new List<List<Verse>>(); Dictionary<Verse, List<Verse>> verse_ranges = new Dictionary<Verse, List<Verse>>(); // need dictionary to check if key exist bool[] already_compared = new bool[Verse.MAX_NUMBER]; if (source != null) { if (source.Count > 0) { Book book = source[0].Book; switch (find_similarity_method) { case SimilarityMethod.SimilarText: { for (int i = 0; i < source.Count - 1; i++) { for (int j = i + 1; j < source.Count; j++) { if (!already_compared[j]) { if (source[i].Text.IsSimilarTo(source[j].Text, similarity_percentage)) { if (!verse_ranges.ContainsKey(source[i])) // first time matching verses found { List<Verse> similar_verses = new List<Verse>(); verse_ranges.Add(source[i], similar_verses); similar_verses.Add(source[i]); similar_verses.Add(source[j]); already_compared[i] = true; already_compared[j] = true; } else // matching verses already exists { List<Verse> similar_verses = verse_ranges[source[i]]; similar_verses.Add(source[j]); already_compared[j] = true; } } } } } } break; case SimilarityMethod.SimilarWords: { for (int i = 0; i < source.Count - 1; i++) { for (int j = i + 1; j < source.Count; j++) { if (!already_compared[j]) { if (source[i].Text.HasSimilarWordsTo(source[j].Text, (int)Math.Round((Math.Min(source[i].Words.Count, source[j].Words.Count) * similarity_percentage)), 1.0)) { if (!verse_ranges.ContainsKey(source[i])) // first time matching verses found { List<Verse> similar_verses = new List<Verse>(); verse_ranges.Add(source[i], similar_verses); similar_verses.Add(source[i]); similar_verses.Add(source[j]); already_compared[i] = true; already_compared[j] = true; } else // matching verses already exists { List<Verse> similar_verses = verse_ranges[source[i]]; similar_verses.Add(source[j]); already_compared[j] = true; } } } } } } break; case SimilarityMethod.SimilarFirstWord: { for (int i = 0; i < source.Count - 1; i++) { for (int j = i + 1; j < source.Count; j++) { if (!already_compared[j]) { if (source[j].Text.HasSimilarFirstWordTo(source[j].Text, similarity_percentage)) { if (!verse_ranges.ContainsKey(source[i])) // first time matching verses found { List<Verse> similar_verses = new List<Verse>(); verse_ranges.Add(source[i], similar_verses); similar_verses.Add(source[i]); similar_verses.Add(source[j]); already_compared[i] = true; already_compared[j] = true; } else // matching verses already exists { List<Verse> similar_verses = verse_ranges[source[i]]; similar_verses.Add(source[j]); already_compared[j] = true; } } } } } } break; case SimilarityMethod.SimilarLastWord: { for (int i = 0; i < source.Count - 1; i++) { for (int j = i + 1; j < source.Count; j++) { if (!already_compared[j]) { if (source[i].Text.HasSimilarLastWordTo(source[j].Text, similarity_percentage)) { if (!verse_ranges.ContainsKey(source[i])) // first time matching verses found { List<Verse> similar_verses = new List<Verse>(); verse_ranges.Add(source[i], similar_verses); similar_verses.Add(source[i]); similar_verses.Add(source[j]); already_compared[i] = true; already_compared[j] = true; } else // matching verses already exists { List<Verse> similar_verses = verse_ranges[source[i]]; similar_verses.Add(source[j]); already_compared[j] = true; } } } } } } break; default: break; } } } // copy dictionary to list of list if (verse_ranges.Count > 0) { foreach (List<Verse> verse_range in verse_ranges.Values) { result.Add(verse_range); } } return result; }
// find by similarity - verse similar to given verse /// <summary> /// Find verses with similar text to verse text to given similarity percentage or above with give similarity method /// </summary> /// <param name="verse"></param> /// <param name="similarity_method"></param> /// <param name="similarity_percentage"></param> /// <returns>Number of found verses. Result is stored in FoundVerses.</returns> public int FindVerses(Verse verse, SimilarityMethod similarity_method, double similarity_percentage) { m_found_verses = Server.FindVerses(m_book, m_find_scope, m_selection, m_found_verses, verse, similarity_method, similarity_percentage); if (m_found_verses != null) { return m_found_verses.Count; } return 0; }
// find by similarity - all similar verses to each other throughout the book /// <summary> /// Find verse ranges with similar text to each other to given similarity percentage or above. /// </summary> /// <param name="similarity_method"></param> /// <param name="similarity_percentage"></param> /// <returns>Number of found verse ranges. Result is stored in FoundVerseRanges.</returns> public int FindVerseRanges(SimilarityMethod similarity_method, double similarity_percentage) { m_found_verse_ranges = Server.FindVerseRanges(m_book, m_find_scope, m_selection, m_found_verses, similarity_method, similarity_percentage); if (m_found_verse_ranges != null) { m_found_verses = new List<Verse>(); foreach (List<Verse> verse_range in m_found_verse_ranges) { m_found_verses.AddRange(verse_range); } return m_found_verse_ranges.Count; } return 0; }