public void RunBeforeEachTest()
        {
            oracle = new TLSimilarityMatrix();
            oracle.AddLink("A", "B*", 1);
            oracle.AddLink("A", "C*", 1);
            oracle.AddLink("A", "D*", 1);
            sims = new TLSimilarityMatrix();

            /* Sorted order:
             * sims.AddLink("A", "B*", 10);
             * sims.AddLink("A", "E",  9);
             * sims.AddLink("A", "F",  8);
             * sims.AddLink("A", "C*", 7);
             * sims.AddLink("A", "G",  6);
             * sims.AddLink("A", "H",  5);
             * sims.AddLink("A", "I",  4);
             * sims.AddLink("A", "J",  3);
             * sims.AddLink("A", "D*", 2);
             * sims.AddLink("A", "K",  1);
             */
            sims.AddLink("A", "G", 6);
            sims.AddLink("A", "K", 1);
            sims.AddLink("A", "B*", 10);
            sims.AddLink("A", "E", 9);
            sims.AddLink("A", "J", 3);
            sims.AddLink("A", "F", 8);
            sims.AddLink("A", "C*", 7);
            sims.AddLink("A", "H", 5);
            sims.AddLink("A", "D*", 2);
            sims.AddLink("A", "I", 4);
        }
Example #2
0
        /// <summary>
        /// Computes the cosine similarity between the given document pairs in the matrix
        /// </summary>
        /// <param name="matrix">Term-by-document matrix</param>
        /// <param name="sourceIDs">Collection of source artifacts ids</param>
        /// /// <param name="targetIDs">Collection of target artifacts ids</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix matrix, IEnumerable <string> sourceIDs, IEnumerable <string> targetIDs)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (string sourceID in sourceIDs)
            {
                double[] sourceDoc = matrix.GetDocument(sourceID);
                foreach (string targetID in targetIDs)
                {
                    // compute cosine similarity between source and target
                    double[] targetDoc     = matrix.GetDocument(targetID);
                    double   lengthProduct = ComputeLength(sourceDoc) * ComputeLength(targetDoc);
                    if (lengthProduct == 0.0)
                    {
                        sims.AddLink(sourceID, targetID, 0.0);
                    }
                    else
                    {
                        double score = ComputeDotProduct(sourceDoc, targetDoc) / lengthProduct;
                        sims.AddLink(sourceID, targetID, score);
                    }
                }
            }
            return(sims);
        }
Example #3
0
        /// <summary>
        /// Imports a file in the form (each line):
        /// SOURCE TARGET SCORE
        /// </summary>
        /// <param name="filename">Similarities file</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Import(String filename)
        {
            StreamReader       file   = new StreamReader(filename);
            TLSimilarityMatrix answer = new TLSimilarityMatrix();
            String             line;
            int num = 0;

            while ((line = file.ReadLine()) != null)
            {
                num++;
                if (String.IsNullOrWhiteSpace(line))
                {
                    continue;
                }
                try
                {
                    String[] artifacts = line.Split();
                    String   source    = artifacts[0];
                    String   target    = artifacts[1];
                    double   score     = Convert.ToDouble(artifacts[2]);
                    answer.AddLink(source, target, score);
                }
                catch (IndexOutOfRangeException e)
                {
                    file.Close();
                    throw new InvalidDataException("Invalid data format on line " + num + " of file:" + filename, e);
                }
            }
            file.Close();
            return(answer);
        }
Example #4
0
        /// <summary>
        /// Imports script results
        /// </summary>
        /// <param name="result">RScriptResults object</param>
        /// <returns>Script results</returns>
        public override object ImportResults(RScriptResult result)
        {
            TextReader rfile   = new StreamReader(_outputFile);
            string     rawdata = rfile.ReadToEnd();

            rfile.Close();
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            string[] sims = rawdata.Remove(0, 2).Replace(")", String.Empty).Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
            int      src  = 0;
            int      tgt  = _source.DocMap.Count;

            if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
            {
                throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count));
            }
            foreach (string sim in sims)
            {
                matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
                tgt++;
                if (tgt == _source.DocMap.Count + _target.DocMap.Count)
                {
                    tgt = _source.DocMap.Count;
                    src++;
                }
            }
            return(matrix);
        }
Example #5
0
        public void SimilarityMatrixRawSerializationTest()
        {
            string[] sources = new string[] { "source1", "source2", "source3", "source4", "source5", "source6", "source7", "source8", "source9", "source10" };
            string[] targets = new string[] { "target1", "target2", "target3", "target4", "target5", "target6", "target7", "target8", "target9", "target10" };

            TLSimilarityMatrix matrixIn = new TLSimilarityMatrix();

            for (int i = 0; i < sources.Length; i++)
            {
                matrixIn.AddLink(sources[i], targets[i], (double)i);
            }

            BinaryWriter binWriter = new BinaryWriter(new MemoryStream());
            BinaryReader binReader = new BinaryReader(binWriter.BaseStream);

            matrixIn.WriteData(binWriter);

            binReader.BaseStream.Position = 0;

            TLSimilarityMatrix matrixOut = new TLSimilarityMatrix();

            matrixOut.ReadData(binReader);

            Assert.AreEqual(matrixIn.Count, matrixOut.Count);

            StringHashSet setIn  = matrixIn.SourceArtifactsIds;
            StringHashSet setOut = matrixOut.SourceArtifactsIds;

            foreach (string artifact in setIn)
            {
                Assert.IsTrue(setOut.Contains(artifact));
            }
        }
Example #6
0
        private static TLSimilarityMatrix Process(TLArtifactsCollection sourceArtifacts, TLDictionaryIndex dict, TracerConfig config)
        {
            if (sourceArtifacts == null)
            {
                throw new ComponentException("Received null sourceArtifacts");
            }

            if (dict == null)
            {
                throw new ComponentException("Received null dictionaryIndex");
            }

            TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix();

            Searcher searcher = new Searcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric));

            // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search
            foreach (TLArtifact sourceArtifact in sourceArtifacts.Values)
            {
                String query = sourceArtifact.Text;

                // Executes the query
                List <Result> results;
                results = searcher.search(query, dict);

                // Iterates over the results and stores them in the matrix
                foreach (Result r in results)
                {
                    string targetArtifactId = r.ArtifactId;
                    similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking);
                }
            }

            return(similarityMatrix);
        }
Example #7
0
        /// <summary>
        /// Computes cosine similarities between two TermDocumentMatrices.
        /// Cosine similarity is defined as (dot product) / (length * length)
        /// </summary>
        /// <param name="m1">Binary document matrix</param>
        /// <param name="m2">tf-idf weighted document matrix</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);

            for (int i = 0; i < m1.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < m2.NumDocs; j++)
                {
                    double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
                    if (lengthProduct == 0.0)
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Example #8
0
        /// <summary>
        /// Import script results
        /// </summary>
        /// <param name="result">RScriptResults object</param>
        /// <returns>Script results</returns>
        public override object ImportResults(RScriptResult result)
        {
            // index = id - 1
            string[]           ids           = Generics.ImportStrings(_mapFile);
            TextReader         resultsMatrix = new StreamReader(_outputFile);
            TLSimilarityMatrix matrix        = new TLSimilarityMatrix();

            string[] sources = resultsMatrix.ReadLine().Split();
            string   line;

            while ((line = resultsMatrix.ReadLine()) != null)
            {
                if (String.IsNullOrWhiteSpace(line))
                {
                    continue;
                }
                // [0] target id, [x+] source sims index = x - 1
                string[] entries = line.Split();
                string   entry   = ids[Convert.ToInt32(entries[0]) - 1];
                for (int i = 0; i < sources.Length; i++)
                {
                    matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1]));
                }
            }
            resultsMatrix.Close();
            return(matrix);
        }
Example #9
0
        public static TLSimilarityMatrix CreateMatrix(TLLinksList list)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            foreach (TLSingleLink link in list)
            {
                matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return(matrix);
        }
Example #10
0
 // this takes a long time
 public static void RemoveNonExecutedMethods(ref TLSimilarityMatrix sourceMatrix, ref TLSimilarityMatrix targetMatrix, String feature, Dictionary <string, int> executedMethods)
 {
     foreach (TLSingleLink link in sourceMatrix.AllLinks)
     {
         if (link.SourceArtifactId == feature && executedMethods.ContainsKey(Regex.Replace(link.TargetArtifactId, "(\\(.*\\))", "")))
         {
             targetMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
 }
Example #11
0
        public override void Compute()
        {
            TLSimilarityMatrix matrix1 = (TLSimilarityMatrix)Workspace.Load("Matrix1");
            TLSimilarityMatrix matrix2 = (TLSimilarityMatrix)Workspace.Load("Matrix2");

            foreach (TLSingleLink link in matrix2.AllLinks)
            {
                matrix1.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            Workspace.Store("Merged", matrix1);
        }
Example #12
0
        /// <summary>
        /// Normalizes a similarity matrix
        /// </summary>
        /// <param name="matrix">Similarity matrix</param>
        /// <returns>Normalized similarity matrix</returns>
        public static TLSimilarityMatrix Normalize(TLSimilarityMatrix matrix)
        {
            TLSimilarityMatrix norm   = new TLSimilarityMatrix();
            double             mean   = TLSimilarityMatrixUtil.AverageSimilarity(matrix);
            double             stdDev = TLSimilarityMatrixUtil.SimilarityStandardDeviation(matrix);

            foreach (TLSingleLink link in matrix.AllLinks)
            {
                norm.AddLink(link.SourceArtifactId, link.TargetArtifactId, (link.Score - mean) / stdDev);
            }
            return(norm);
        }
Example #13
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships)
        {
            // create pseudo matrix for easy lookup
            // Dictionary<sourceID, Dictionary<targetID, score>>
            Dictionary <string, Dictionary <string, double> > storage = new Dictionary <string, Dictionary <string, double> >();

            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (!storage.ContainsKey(link.SourceArtifactId))
                {
                    storage.Add(link.SourceArtifactId, new Dictionary <string, double>());
                }
                storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score);
            }
#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(matrix);
#endif
            // iterate over every (source, target) pair
            TLLinksList links = matrix.AllLinks;
            links.Sort();
            foreach (TLSingleLink link in links)
            {
                // get the set of target artifacts related to link.TargetArtifactId
                // then update the value of (link.SourceArtifactId, relatedArtifact) by delta
                foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId))
                {
#if UseDelta
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta;
#else
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1;
#endif
                }
            }
            // build new matrix
            TLLinksList newLinks = new TLLinksList();
            foreach (string source in storage.Keys)
            {
                foreach (string target in storage[source].Keys)
                {
                    newLinks.Add(new TLSingleLink(source, target, storage[source][target]));
                }
            }
            newLinks.Sort();
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
            foreach (TLSingleLink link in newLinks)
            {
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return(newMatrix);
        }
Example #14
0
        private static void RemoveNonFeature(ref TLSimilarityMatrix sims, FeatureSet set, Dictionary <int, string> qmap)
        {
            TLSimilarityMatrix target  = new TLSimilarityMatrix();
            string             feature = GetFeatureSetType(set);

            foreach (TLSingleLink link in sims.AllLinks)
            {
                if (qmap[Convert.ToInt32(link.SourceArtifactId)] == feature)
                {
                    target.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            sims = target;
        }
Example #15
0
        /// <summary>
        /// Performs an affine transformation on two similarity matrices.
        /// </summary>
        /// <param name="large">Large expert</param>
        /// <param name="small">Small expert</param>
        /// <param name="lambda">Weight given to large expert</param>
        /// <returns>Transformed similarities</returns>
        public static TLSimilarityMatrix Transform(TLSimilarityMatrix large, TLSimilarityMatrix small, double lambda)
        {
            TLSimilarityMatrix largeNormal = Normalize(large);
            TLSimilarityMatrix smallNormal = Normalize(small);
            TLSimilarityMatrix combined    = new TLSimilarityMatrix();

            foreach (TLSingleLink largeLink in largeNormal.AllLinks)
            {
                double smallLink = smallNormal.GetScoreForLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId);
                combined.AddLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId, Combine(largeLink.Score, smallLink, lambda));
            }

            return(combined);
        }
Example #16
0
        /// <summary>
        /// Computes the traceability between source and target artifacts using dictionary and American Corpus Term weigths.
        /// </summary>
        /// <param name="sourceArtifacts">The source artifacts.</param>
        /// <param name="targetArtifacts">The target artifacts.</param>
        /// <param name="dict">The dict.</param>
        /// <param name="ancTermsWeights">The anc terms weights.</param>
        /// <param name="config">The config.</param>
        /// <returns>Similarity matrix with links between source and target artifacts</returns>
        private static TLSimilarityMatrix ComputeTraceability(TLArtifactsCollection sourceArtifacts,
                                                              TLArtifactsCollection targetArtifacts,
                                                              TLDictionaryIndex dict,
                                                              TLKeyValuePairsList ancTermsWeights,
                                                              TracerConfig config)
        {
            if (sourceArtifacts == null)
            {
                throw new ComponentException("Received source artifacts are null!");
            }

            if (targetArtifacts == null)
            {
                throw new ComponentException("Received target artifacts are null!");
            }

            if (dict == null)
            {
                throw new ComponentException("Received dictionary index is null!");
            }

            if (ancTermsWeights == null)
            {
                throw new ComponentException("Received 'ancTermsWeights' is null!");
            }

            TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix();


            ANCSearcher searcher = new ANCSearcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric));

            // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search
            foreach (TLArtifact sourceArtifact in sourceArtifacts.Values)
            {
                String query = sourceArtifact.Text;

                // Executes the query
                List <Result> results;
                results = searcher.search(query, dict, PrepareANCData(ancTermsWeights));

                // Iterates over the results and stores them in the matrix
                foreach (Result r in results)
                {
                    string targetArtifactId = r.ArtifactId;
                    similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking);
                }
            }
            return(similarityMatrix);
        }
Example #17
0
        internal static TLSimilarityMatrix GenerateOracle(string rankFile, string mapFile)
        {
            Console.WriteLine("Generating oracle...");
            IEnumerable <double> ranks = Generics.ImportDoubles(rankFile, false);
            IEnumerable <string> map   = Generics.ImportStrings(mapFile);

            Assert.AreEqual(map.Count(), ranks.Count());
            TLSimilarityMatrix oracle = new TLSimilarityMatrix();

            for (int i = 0; i < map.Count(); i++)
            {
                oracle.AddLink("trace", map.ElementAt(i), ranks.ElementAt(i));
            }
            return(oracle);
        }
Example #18
0
        /// <summary>
        /// Import script results
        /// </summary>
        /// <param name="result">RScriptResults object</param>
        /// <returns>Script results</returns>
        public override object ImportResults(RScriptResult result)
        {
            IEnumerable <double> ranks = Generics.ImportDoubles(_outputFile, false);
            IEnumerable <string> map   = Generics.ImportStrings(_mappingFile);

            if (ranks.Count() != map.Count())
            {
                throw new RDataException("Results file in incorrect format: incorrect number of entries");
            }
            TLSimilarityMatrix rankList = new TLSimilarityMatrix();

            for (int i = 0; i < map.Count(); i++)
            {
                rankList.AddLink(_traceID, map.ElementAt(i), ranks.ElementAt(i));
            }
            return(rankList);
        }
Example #19
0
        /// <summary>
        /// FORMAT
        /// ======
        /// Line 1  - "","UC","CC","Similarity","Oracle","Precision","Recall","feedback"
        /// Line 2+ - values
        /// </summary>
        /// <param name="path"></param>
        /// <returns></returns>
        public static TLSimilarityMatrix Import(string path)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            matrix.Threshold = Double.MinValue;
            TextReader file = new StreamReader(path);

            file.ReadLine();
            string line;

            while ((line = file.ReadLine()) != null)
            {
                string[] item = line.Split(new char[] { ',', '"' }, StringSplitOptions.RemoveEmptyEntries);
                matrix.AddLink(item[1], item[2], Convert.ToDouble(item[3]));
            }
            return(matrix);
        }
Example #20
0
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix sims, TLSimilarityMatrix relationships, TLSimilarityMatrix feedback)
        {
            // new matrix
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();

#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(sims);
#endif
            // make sure the entire list is sorted
            TLLinksList links = sims.AllLinks;
            links.Sort();
            // end condition
            int correct = 0;
            // iterate over each source-target pair
            while (links.Count > 0 && correct < feedback.Count)
            {
                // get link at top of list
                TLSingleLink link = links[0];
                // check feedback
                if (feedback.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    correct++;
                    // update related links
                    for (int i = 1; i < links.Count; i++)
                    {
                        if (link.SourceArtifactId.Equals(links[i].SourceArtifactId) &&
                            relationships.IsLinkAboveThreshold(link.TargetArtifactId, links[i].TargetArtifactId))
                        {
#if UseDelta
                            links[i].Score += links[i].Score * delta;
#else
                            links[i].Score += links[i].Score * 0.1;
#endif
                        }
                    }
                }
                // remove link
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                links.RemoveAt(0);
                // reorder links
                links.Sort();
            }
            return(newMatrix);
        }
Example #21
0
        public static TLSimilarityMatrix Import(string directory)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            foreach (String file in Directory.GetFiles(directory))
            {
                String       feature = Similarities.ExtractFeatureID(file);
                StreamReader links   = new StreamReader(file);
                String       link;

                while ((link = links.ReadLine()) != null)
                {
                    matrix.AddLink(feature, link, 1);
                }
                links.Close();
            }
            return(matrix);
        }
Example #22
0
        /// <summary>
        /// Extracts links containing the given artifact IDs from a similarity matrix.
        /// </summary>
        /// <param name="original">Original matrix</param>
        /// <param name="artifactIDs">List of artifact IDs</param>
        /// <param name="ignoreParameters">Flag to ignore parameter overloads and compare only method names.</param>
        /// <returns>Extracted links</returns>
        public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable <string> artifactIDs, bool ignoreParameters)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            foreach (TLSingleLink link in original)
            {
                string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0)
                    ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('('))
                    : link.SourceArtifactId;
                string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0)
                    ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('('))
                    : link.TargetArtifactId;
                if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
                {
                    matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(matrix.AllLinks);
        }
Example #23
0
 public static void Split(ref TLSimilarityMatrix original, Dictionary <int, string> qmap, ref TLSimilarityMatrix bugs, ref TLSimilarityMatrix features, ref TLSimilarityMatrix patch)
 {
     foreach (TLSingleLink link in original.AllLinks)
     {
         string feature = qmap[Convert.ToInt32(link.SourceArtifactId)];
         if (feature == Trace.GetFeatureSetType(FeatureSet.Bugs))
         {
             bugs.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
         else if (feature == Trace.GetFeatureSetType(FeatureSet.Features))
         {
             features.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
         else if (feature == Trace.GetFeatureSetType(FeatureSet.Patch))
         {
             patch.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
 }
Example #24
0
        public static TLSimilarityMatrix Import(String directory, List <String> map)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (String file in Directory.GetFiles(directory))
            {
                String       feature = ExtractFeatureID(file);
                StreamReader idFile  = new StreamReader(file);
                String       line;

                while ((line = idFile.ReadLine()) != null)
                {
                    String[] vars = line.Split(' ');
                    sims.AddLink(feature, map[Convert.ToInt32(vars[0]) - 1], Convert.ToDouble(vars[2]));
                }
                idFile.Close();
            }

            return(sims);
        }
Example #25
0
        /// <summary>
        /// Imports an oracle from a directory of files.
        /// Each file is a source artifact containing targets on each line.
        /// </summary>
        /// <param name="directory"></param>
        /// <returns></returns>
        public static TLSimilarityMatrix ImportDirectory(string directory)
        {
            TLSimilarityMatrix oracle = new TLSimilarityMatrix();

            foreach (string file in Directory.GetFiles(directory))
            {
                string     id      = Path.GetFileName(file);
                TextReader fReader = new StreamReader(file);
                string     line;
                while ((line = fReader.ReadLine()) != null)
                {
                    if (String.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    oracle.AddLink(id, line, 1);
                }
            }
            return(oracle);
        }
Example #26
0
        /// <summary>
        /// Imports an answer set from file in the form (each line):
        /// SOURCE TARGET1 TARGET2 ...
        /// </summary>
        /// <param name="filename">File location</param>
        /// <returns>Similarity matrix (link score 1)</returns>
        public static TLSimilarityMatrix Import(String filename)
        {
            StreamReader       file   = new StreamReader(filename);
            TLSimilarityMatrix answer = new TLSimilarityMatrix();
            String             line;

            while ((line = file.ReadLine()) != null)
            {
                String[] artifacts = line.Split();
                String   source    = artifacts[0];
                for (int i = 1; i < artifacts.Length; i++)
                {
                    String target = artifacts[i].Trim();
                    if (target != "")
                    {
                        answer.AddLink(source, target, 1);
                    }
                }
            }
            return(answer);
        }
Example #27
0
        /// <summary>
        /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
        /// </summary>
        /// <param name="source">Source artifacts collection</param>
        /// <param name="target">Target artifacts collection</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
        {
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();

            for (int i = 0; i < matrices[0].NumDocs; i++)
            {
                TLLinksList list = new TLLinksList();
                for (int j = 0; j < matrices[1].NumDocs; j++)
                {
                    list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                                              DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
                }
                list.Sort();
                foreach (TLSingleLink link in list)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
Example #28
0
        public static TLSimilarityMatrix Import(String directory)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            /* Each file is a link
             * filename.map - feature
             * Each line in file is a methodID
             */
            foreach (String file in Directory.GetFiles(directory))
            {
                String       feature = ExtractFeatureID(file);
                StreamReader links   = new StreamReader(file);
                String       link;

                while ((link = links.ReadLine()) != null)
                {
                    matrix.AddLink(feature, link, 1);
                }
            }
            return(matrix);
        }
Example #29
0
        public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (KeyValuePair <string, DocumentVector> QueryKVP in queries)
            {
                /*
                 * Since tf in queries are all 1,
                 * we can assume this term is the sqrt of the size of the dictionary
                 */
                double qVal = Math.Sqrt(QueryKVP.Value.Count);
                foreach (KeyValuePair <string, NormalizedVector> DocKVP in docs)
                {
                    double dVal  = lengths[DocKVP.Key];
                    double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value);
                    sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal));
                }
            }

            return(sims);
        }
Example #30
0
        /// <summary>
        /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
        /// </summary>
        /// <param name="ids">Boolean document vectors</param>
        /// <param name="tfidf">tf-idf weighted document vectors</param>
        /// <returns>Similarity matrix</returns>
        private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);

            for (int i = 0; i < ids.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < tfidf.NumDocs; j++)
                {
                    double product  = 0.0;
                    double asquared = 0.0;
                    double bsquared = 0.0;
                    for (int k = 0; k < matrices[0].NumTerms; k++)
                    {
                        double a = matrices[0][i, k];
                        double b = matrices[1][j, k];
                        product  += (a * b);
                        asquared += Math.Pow(a, 2);
                        bsquared += Math.Pow(b, 2);
                    }
                    double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
                    if (cross == 0.0)
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }