public override void Compute() { TLArtifactsCollection sourceArtifacts = (TLArtifactsCollection)Workspace.Load("SourceArtifacts"); TLArtifactsCollection targetArtifacts = (TLArtifactsCollection)Workspace.Load("TargetArtifacts"); TermDocumentMatrix matrix = new TermDocumentMatrix(sourceArtifacts, targetArtifacts); matrix = SmoothingFilter.Compute(matrix, sourceArtifacts.Keys); matrix = SmoothingFilter.Compute(matrix, targetArtifacts.Keys); TLSimilarityMatrix sims = SimilarityUtil.ComputeCosine(matrix, sourceArtifacts.Keys, targetArtifacts.Keys); Workspace.Store("Similarities", sims); }
/// <summary> /// Computes cosine similarities between documents via the Vector Space Model. /// </summary> /// <param name="source">Source artifacts</param> /// <param name="target">Target artifacts</param> /// <param name="weight">Weighting scheme</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix Compute(TLArtifactsCollection source, TLArtifactsCollection target, VSMWeightEnum weight) { switch (weight) { case VSMWeightEnum.TFIDF: return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeTFIDF(new TermDocumentMatrix(source, target)), source.Keys, target.Keys)); case VSMWeightEnum.BooleanQueriesAndTFIDFCorpus: return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeBinaryTF(new TermDocumentMatrix(source)), WeightUtil.ComputeTFIDF(new TermDocumentMatrix(target)))); case VSMWeightEnum.NoWeight: return(SimilarityUtil.ComputeCosine(new TermDocumentMatrix(source, target), source.Keys, target.Keys)); default: throw new NotImplementedException("Unknown weighting scheme \"" + weight + "\""); } }