Beispiel #1
0
        protected void ComputeSimilarity(ComputeSimilarityTask task)
        {
            var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);
            var score_ContentSimilarity   = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = task.documentA,
                itemB = task.documentB,
                StructureSimilarity = score_StructureSimilarity,
                ContentSimilarity   = score_ContentSimilarity
            };


            task.output = output;
        }
Beispiel #2
0
        /// <summary>
        /// Computes similarity for two documents, that are part of result's inner collections
        /// </summary>
        /// <param name="documentA">The document a.</param>
        /// <param name="documentB">The document b.</param>
        /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/></param>
        /// <returns>Result for these two documents</returns>
        public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB, DocumentSimilarityResult result)
        {
            List <LeafNodeDictionaryEntryNGram> nGrams_A = result.NGramsByDocuments[documentA]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode);
            List <LeafNodeDictionaryEntryNGram> nGrams_B = result.NGramsByDocuments[documentB]; //setAnalysisTools<LeafNodeDictionaryEntry>.getNGramSet<LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode);

            var score_StructureSimilarity = StructureSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);
            var score_ContentSimilarity   = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = documentA,
                itemB = documentB,
                StructureSimilarity = score_StructureSimilarity,
                ContentSimilarity   = score_ContentSimilarity
            };



            return(output);
        }
Beispiel #3
0
        /// <summary>
        /// Computes the similarity between two items. If you have to compare more than two documents, use <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})"/> and <see cref="ComputeSimilarity(HtmlNode, HtmlNode, DocumentSimilarityResult)"/> methods for greater performances
        /// </summary>
        /// <param name="documentA">The document a.</param>
        /// <param name="documentB">The document b.</param>
        /// <returns></returns>
        public DocumentSimilarityResultPair ComputeSimilarity(HtmlNode documentA, HtmlNode documentB)
        {
            LeafNodeDictionary leafNodeDictionaryA = new LeafNodeDictionary(documentA);
            LeafNodeDictionary leafNodeDictionaryB = new LeafNodeDictionary(documentB);

            List <LeafNodeDictionaryEntryNGram> nGrams_A = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryA.items, settings.nGramWidth, settings.nGramMode);

            List <LeafNodeDictionaryEntryNGram> nGrams_B = setAnalysisTools <LeafNodeDictionaryEntry> .getNGramSet <LeafNodeDictionaryEntryNGram>(leafNodeDictionaryB.items, settings.nGramWidth, settings.nGramMode);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = documentA,
                itemB = documentB
            };

            output.ContentSimilarity   = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);
            output.StructureSimilarity = ContentSimilarity.GetSimilarity(nGrams_A, nGrams_B, settings.computationMethod);

            return(output);
        }
 public void AddResult(DocumentSimilarityResultPair pair)
 {
     ContentSimilarityRange.Learn(pair.ContentSimilarity);
     StructureSimilarityRange.Learn(pair.StructureSimilarity);
     items.Add(pair);
 }