Example #1
0
        /// <summary>
        /// Computes the similarity for the result object
        /// </summary>
        /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})" /></param>
        /// <param name="output">The output.</param>
        /// <param name="documents">Optional: select subset of documents to be analysed. These must be within <see cref="result" /> inner collections</param>
        /// <returns>
        /// The same result object specified in parameters
        /// </returns>
        public DocumentSimilarityResult ComputeSimilarity(DocumentSimilarityResult result, ITextRender output, List <HtmlNode> documents = null)
        {
            if (documents.isNullOrEmpty())
            {
                documents = result.LeafDictionaryByDocuments.Keys.ToList();
            }

            List <ComputeSimilarityTask> tasks = new List <ComputeSimilarityTask>();

            for (int i = 0; i < documents.Count - 1; i++)
            {
                for (int y = i + 1; y < documents.Count; y++)
                {
                    ComputeSimilarityTask task = new ComputeSimilarityTask()
                    {
                        documentA = documents[i],
                        documentB = documents[y],
                        nGrams_A  = result.NGramsByDocuments[documents[i]],
                        nGrams_B  = result.NGramsByDocuments[documents[y]]
                    };
                    tasks.Add(task);

                    //var documentA = ;
                    //var documentB = documents[y];

                    //var ABResult = ComputeSimilarity(documentA, documentB, result);
                    //result.AddResult(ABResult);
                }
            }

            var task_chunks = tasks.SplitBySize((tasks.Count / 5));

            foreach (var task_chunk in task_chunks)
            {
                output.AppendLine("Executing similarity computation task chunk [size:" + task_chunk.Count + "] " + task_chunks.IndexOf(task_chunk) + " of " + task_chunks.Count);

                Parallel.ForEach <ComputeSimilarityTask>(task_chunk, x =>
                {
                    ComputeSimilarity(x);
                }
                                                         );

                foreach (var task in task_chunk)
                {
                    if (task.output != null)
                    {
                        result.AddResult(task.output);
                    }
                }
            }



            return(result);
        }
Example #2
0
        protected void ComputeSimilarity(ComputeSimilarityTask task)
        {
            var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);
            var score_ContentSimilarity   = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod);

            DocumentSimilarityResultPair output = new DocumentSimilarityResultPair
            {
                itemA = task.documentA,
                itemB = task.documentB,
                StructureSimilarity = score_StructureSimilarity,
                ContentSimilarity   = score_ContentSimilarity
            };


            task.output = output;
        }