protected void ComputeSimilarity(ComputeSimilarityTask task) { var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); var score_ContentSimilarity = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = task.documentA, itemB = task.documentB, StructureSimilarity = score_StructureSimilarity, ContentSimilarity = score_ContentSimilarity }; task.output = output; }
/// <summary> /// Computes similarity for two documents, that are part of result's inner collections /// </summary> /// <param name="documentA">The document a.</param> /// <param name="documentB">The document b.</param> /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/></param> /// <returns>Result for these two documents</returns> public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB, DocumentSimilarityResult result) { List <LeafNodeDictionaryEntryNGram> nGrams_A = result.NGramsByDocuments[documentA]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode); List <LeafNodeDictionaryEntryNGram> nGrams_B = result.NGramsByDocuments[documentB]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode); var score_StructureSimilarity = StructureSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); var score_ContentSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = documentA, itemB = documentB, StructureSimilarity = score_StructureSimilarity, ContentSimilarity = score_ContentSimilarity }; return(output); }
/// <summary> /// Computes the similarity between two items. If you have to compare more than two documents, use <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/> and <see cref="ComputeSimilarity(HtmlNode, HtmlNode, DocumentSimilarityResult)"/> methods for greater performances /// </summary> /// <param name="documentA">The document a.</param> /// <param name="documentB">The document b.</param> /// <returns></returns> public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB) { LeafNodeDictionary leafNodeDictionaryA = new LeafNodeDictionary(documentA); LeafNodeDictionary leafNodeDictionaryB = new LeafNodeDictionary(documentB); List <LeafNodeDictionaryEntryNGram> nGrams_A = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode); List <LeafNodeDictionaryEntryNGram> nGrams_B = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = documentA, itemB = documentB }; output.ContentSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); output.StructureSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); return(output); }
public void AddResult(DocumentSimilarityResultPair pair) { ContentSimilarityRange.Learn(pair.ContentSimilarity); StructureSimilarityRange.Learn(pair.StructureSimilarity); items.Add(pair); }