public void Publish(Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, folderNode folderWithResults, ITextRender output)
        {
            HtmlSourceAndUrl sourceA = documentNodeDictionary[itemA];
            HtmlSourceAndUrl sourceB = documentNodeDictionary[itemB];


            output.AppendLine("A: " + sourceA.filepath);
            output.AppendLine("B: " + sourceB.filepath);
            output.nextTabLevel();
            output.AppendLine("CS: " + ContentSimilarity.ToString("F3"));
            output.AppendLine("SS: " + StructureSimilarity.ToString("F3"));
            output.AppendLine("OS: " + OverallSimilarity.ToString("F3"));
            output.prevTabLevel();
        }
Exemplo n.º 2
0
        protected void ComputeSimilarity(ComputeSimilarityTask task)
        {
            var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);
            var score_ContentSimilarity   = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = task.documentA,
                itemB = task.documentB,
                StructureSimilarity = score_StructureSimilarity,
                ContentSimilarity   = score_ContentSimilarity
            };


            task.output = output;
        }
Exemplo n.º 3
0
        /// <summary>
        /// Computes similarity for two documents, that are part of result's inner collections
        /// </summary>
        /// <param name="documentA">The document a.</param>
        /// <param name="documentB">The document b.</param>
        /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/></param>
        /// <returns>Result for these two documents</returns>
        public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB, DocumentSimilarityResult result)
        {
            List <LeafNodeDictionaryEntryNGram> nGrams_A = result.NGramsByDocuments[documentA]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode);
            List <LeafNodeDictionaryEntryNGram> nGrams_B = result.NGramsByDocuments[documentB]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode);

            var score_StructureSimilarity = StructureSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);
            var score_ContentSimilarity   = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = documentA,
                itemB = documentB,
                StructureSimilarity = score_StructureSimilarity,
                ContentSimilarity   = score_ContentSimilarity
            };



            return(output);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Computes the similarity between two items. If you have to compare more than two documents, use <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/> and <see cref="ComputeSimilarity(HtmlNode, HtmlNode, DocumentSimilarityResult)"/> methods for greater performances
        /// </summary>
        /// <param name="documentA">The document a.</param>
        /// <param name="documentB">The document b.</param>
        /// <returns></returns>
        public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB)
        {
            LeafNodeDictionary leafNodeDictionaryA = new LeafNodeDictionary(documentA);
            LeafNodeDictionary leafNodeDictionaryB = new LeafNodeDictionary(documentB);

            List <LeafNodeDictionaryEntryNGram> nGrams_A = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode);

            List <LeafNodeDictionaryEntryNGram> nGrams_B = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = documentA,
                itemB = documentB
            };

            output.ContentSimilarity   = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);
            output.StructureSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);

            return(output);
        }