public void Publish(Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, ITextRender output) { HtmlSourceAndUrl sourceA = documentNodeDictionary[itemA]; HtmlSourceAndUrl sourceB = documentNodeDictionary[itemB]; output.AppendLine("A: " + sourceA.filepath); output.AppendLine("B: " + sourceB.filepath); output.nextTabLevel(); output.AppendLine("CS: " + ContentSimilarity.ToString("F3")); output.AppendLine("SS: " + StructureSimilarity.ToString("F3")); output.AppendLine("OS: " + OverallSimilarity.ToString("F3")); output.prevTabLevel(); }
protected void ComputeSimilarity(ComputeSimilarityTask task) { var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); var score_ContentSimilarity = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = task.documentA, itemB = task.documentB, StructureSimilarity = score_StructureSimilarity, ContentSimilarity = score_ContentSimilarity }; task.output = output; }
/// <summary> /// Computes similarity for two documents, that are part of result's inner collections /// </summary> /// <param name="documentA">The document a.</param> /// <param name="documentB">The document b.</param> /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/></param> /// <returns>Result for these two documents</returns> public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB, DocumentSimilarityResult result) { List <LeafNodeDictionaryEntryNGram> nGrams_A = result.NGramsByDocuments[documentA]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode); List <LeafNodeDictionaryEntryNGram> nGrams_B = result.NGramsByDocuments[documentB]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode); var score_StructureSimilarity = StructureSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); var score_ContentSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = documentA, itemB = documentB, StructureSimilarity = score_StructureSimilarity, ContentSimilarity = score_ContentSimilarity }; return(output); }
/// <summary> /// Computes the similarity between two items. If you have to compare more than two documents, use <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/> and <see cref="ComputeSimilarity(HtmlNode, HtmlNode, DocumentSimilarityResult)"/> methods for greater performances /// </summary> /// <param name="documentA">The document a.</param> /// <param name="documentB">The document b.</param> /// <returns></returns> public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB) { LeafNodeDictionary leafNodeDictionaryA = new LeafNodeDictionary(documentA); LeafNodeDictionary leafNodeDictionaryB = new LeafNodeDictionary(documentB); List <LeafNodeDictionaryEntryNGram> nGrams_A = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode); List <LeafNodeDictionaryEntryNGram> nGrams_B = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = documentA, itemB = documentB }; output.ContentSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); output.StructureSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod); return(output); }