/// <summary> /// Computes the similarity for the result object /// </summary> /// <param name="result">The result object, previously created with <see cref="Prepare(IEnumerable{HtmlNode}, string, List{string})" /></param> /// <param name="output">The output.</param> /// <param name="documents">Optional: select subset of documents to be analysed. These must be within <see cref="result" /> inner collections</param> /// <returns> /// The same result object specified in parameters /// </returns> public DocumentSimilarityResult ComputeSimilarity(DocumentSimilarityResult result, ITextRender output, List <HtmlNode> documents = null) { if (documents.isNullOrEmpty()) { documents = result.LeafDictionaryByDocuments.Keys.ToList(); } List <ComputeSimilarityTask> tasks = new List <ComputeSimilarityTask>(); for (int i = 0; i < documents.Count - 1; i++) { for (int y = i + 1; y < documents.Count; y++) { ComputeSimilarityTask task = new ComputeSimilarityTask() { documentA = documents[i], documentB = documents[y], nGrams_A = result.NGramsByDocuments[documents[i]], nGrams_B = result.NGramsByDocuments[documents[y]] }; tasks.Add(task); //var documentA = ; //var documentB = documents[y]; //var ABResult = ComputeSimilarity(documentA, documentB, result); //result.AddResult(ABResult); } } var task_chunks = tasks.SplitBySize((tasks.Count / 5)); foreach (var task_chunk in task_chunks) { output.AppendLine("Executing similarity computation task chunk [size:" + task_chunk.Count + "] " + task_chunks.IndexOf(task_chunk) + " of " + task_chunks.Count); Parallel.ForEach <ComputeSimilarityTask>(task_chunk, x => { ComputeSimilarity(x); } ); foreach (var task in task_chunk) { if (task.output != null) { result.AddResult(task.output); } } } return(result); }
protected void ComputeSimilarity(ComputeSimilarityTask task) { var score_StructureSimilarity = StructureSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); var score_ContentSimilarity = ContentSimilarity.GetSimilarity(task.nGrams_A, task.nGrams_B, settings.computationMethod); DocumentSimilarityResultPair output = new DocumentSimilarityResultPair { itemA = task.documentA, itemB = task.documentB, StructureSimilarity = score_StructureSimilarity, ContentSimilarity = score_ContentSimilarity }; task.output = output; }