Example #1
0
        public override void Compute()
        {
            TLArtifactsCollection sourceArtifacts = (TLArtifactsCollection)Workspace.Load("SourceArtifacts");
            TLArtifactsCollection targetArtifacts = (TLArtifactsCollection)Workspace.Load("TargetArtifacts");
            TermDocumentMatrix    matrix          = new TermDocumentMatrix(sourceArtifacts, targetArtifacts);

            matrix = SmoothingFilter.Compute(matrix, sourceArtifacts.Keys);
            matrix = SmoothingFilter.Compute(matrix, targetArtifacts.Keys);
            TLSimilarityMatrix sims = SimilarityUtil.ComputeCosine(matrix, sourceArtifacts.Keys, targetArtifacts.Keys);

            Workspace.Store("Similarities", sims);
        }
Example #2
0
        /// <summary>
        /// Computes cosine similarities between documents via the Vector Space Model.
        /// </summary>
        /// <param name="source">Source artifacts</param>
        /// <param name="target">Target artifacts</param>
        /// <param name="weight">Weighting scheme</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TLArtifactsCollection source, TLArtifactsCollection target, VSMWeightEnum weight)
        {
            switch (weight)
            {
            case VSMWeightEnum.TFIDF:
                return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeTFIDF(new TermDocumentMatrix(source, target)), source.Keys, target.Keys));

            case VSMWeightEnum.BooleanQueriesAndTFIDFCorpus:
                return(SimilarityUtil.ComputeCosine(WeightUtil.ComputeBinaryTF(new TermDocumentMatrix(source)), WeightUtil.ComputeTFIDF(new TermDocumentMatrix(target))));

            case VSMWeightEnum.NoWeight:
                return(SimilarityUtil.ComputeCosine(new TermDocumentMatrix(source, target), source.Keys, target.Keys));

            default:
                throw new NotImplementedException("Unknown weighting scheme \"" + weight + "\"");
            }
        }