public void SimilarityMatrixRawSerializationTest()
        {
            string[] sources = new string[] { "source1", "source2", "source3", "source4", "source5", "source6", "source7", "source8", "source9", "source10" };
            string[] targets = new string[] { "target1", "target2", "target3", "target4", "target5", "target6", "target7", "target8", "target9", "target10" };

            TLSimilarityMatrix matrixIn = new TLSimilarityMatrix();
            for (int i = 0; i < sources.Length; i++)
            {
                matrixIn.AddLink(sources[i], targets[i], (double)i);
            }

            BinaryWriter binWriter = new BinaryWriter(new MemoryStream());
            BinaryReader binReader = new BinaryReader(binWriter.BaseStream);

            matrixIn.WriteData(binWriter);

            binReader.BaseStream.Position = 0;

            TLSimilarityMatrix matrixOut = new TLSimilarityMatrix();
            matrixOut.ReadData(binReader);

            Assert.AreEqual(matrixIn.Count, matrixOut.Count);

            StringHashSet setIn = matrixIn.SourceArtifactsIds;
            StringHashSet setOut = matrixOut.SourceArtifactsIds;

            foreach (string artifact in setIn)
            {
                Assert.IsTrue(setOut.Contains(artifact));
            }
        }
Exemple #2
0
 /// <summary>
 /// Computes cosine similarities between two TermDocumentMatrices.
 /// Cosine similarity is defined as (dot product) / (length * length)
 /// </summary>
 /// <param name="m1">Binary document matrix</param>
 /// <param name="m2">tf-idf weighted document matrix</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);
     for (int i = 0; i < m1.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < m2.NumDocs; j++)
         {
             double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
             if (lengthProduct == 0.0)
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
 /// <summary>
 /// Imports a file in the form (each line):
 /// SOURCE TARGET SCORE
 /// </summary>
 /// <param name="filename">Similarities file</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix Import(String filename)
 {
     StreamReader file = new StreamReader(filename);
     TLSimilarityMatrix answer = new TLSimilarityMatrix();
     String line;
     int num = 0;
     while ((line = file.ReadLine()) != null)
     {
         num++;
         if (String.IsNullOrWhiteSpace(line))
             continue;
         try
         {
             String[] artifacts = line.Split();
             String source = artifacts[0];
             String target = artifacts[1];
             double score = Convert.ToDouble(artifacts[2]);
             answer.AddLink(source, target, score);
         }
         catch (IndexOutOfRangeException e)
         {
             file.Close();
             throw new InvalidDataException("Invalid data format on line " + num + " of file:" + filename, e);
         }
     }
     file.Close();
     return answer;
 }
Exemple #4
0
 public static TLSimilarityMatrix CreateMatrix(TLLinksList list)
 {
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     foreach (TLSingleLink link in list)
     {
         matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
     }
     return matrix;
 }
 /// <summary>
 /// Normalizes a similarity matrix
 /// </summary>
 /// <param name="matrix">Similarity matrix</param>
 /// <returns>Normalized similarity matrix</returns>
 public static TLSimilarityMatrix Normalize(TLSimilarityMatrix matrix)
 {
     TLSimilarityMatrix norm = new TLSimilarityMatrix();
     double mean = TLSimilarityMatrixUtil.AverageSimilarity(matrix);
     double stdDev = TLSimilarityMatrixUtil.SimilarityStandardDeviation(matrix);
     foreach (TLSingleLink link in matrix.AllLinks)
     {
         norm.AddLink(link.SourceArtifactId, link.TargetArtifactId, (link.Score - mean) / stdDev);
     }
     return norm;
 }
 private static void RemoveNonFeature(ref TLSimilarityMatrix sims, FeatureSet set, Dictionary<int, string> qmap)
 {
     TLSimilarityMatrix target = new TLSimilarityMatrix();
     string feature = GetFeatureSetType(set);
     foreach (TLSingleLink link in sims.AllLinks)
     {
         if (qmap[Convert.ToInt32(link.SourceArtifactId)] == feature)
         {
             target.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     sims = target;
 }
 public static void Split(ref TLSimilarityMatrix original, Dictionary<int, string> qmap, ref TLSimilarityMatrix bugs, ref TLSimilarityMatrix features, ref TLSimilarityMatrix patch)
 {
     foreach (TLSingleLink link in original.AllLinks)
     {
         string feature = qmap[Convert.ToInt32(link.SourceArtifactId)];
         if (feature == Trace.GetFeatureSetType(FeatureSet.Bugs))
             bugs.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         else if (feature == Trace.GetFeatureSetType(FeatureSet.Features))
             features.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         else if (feature == Trace.GetFeatureSetType(FeatureSet.Patch))
             patch.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
     }
 }
 internal static TLSimilarityMatrix GenerateOracle(string rankFile, string mapFile)
 {
     Console.WriteLine("Generating oracle...");
     IEnumerable<double> ranks = Generics.ImportDoubles(rankFile, false);
     IEnumerable<string> map = Generics.ImportStrings(mapFile);
     Assert.AreEqual(map.Count(), ranks.Count());
     TLSimilarityMatrix oracle = new TLSimilarityMatrix();
     for (int i = 0; i < map.Count(); i++)
     {
         oracle.AddLink("trace", map.ElementAt(i), ranks.ElementAt(i));
     }
     return oracle;
 }
        /// <summary>
        /// Performs an affine transformation on two similarity matrices.
        /// </summary>
        /// <param name="large">Large expert</param>
        /// <param name="small">Small expert</param>
        /// <param name="lambda">Weight given to large expert</param>
        /// <returns>Transformed similarities</returns>
        public static TLSimilarityMatrix Transform(TLSimilarityMatrix large, TLSimilarityMatrix small, double lambda)
        {
            TLSimilarityMatrix largeNormal = Normalize(large);
            TLSimilarityMatrix smallNormal = Normalize(small);
            TLSimilarityMatrix combined = new TLSimilarityMatrix();

            foreach (TLSingleLink largeLink in largeNormal.AllLinks)
            {
                double smallLink = smallNormal.GetScoreForLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId);
                combined.AddLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId, Combine(largeLink.Score, smallLink, lambda));
            }

            return combined;
        }
 /// <summary>
 /// FORMAT
 /// ======
 /// Line 1  - "","UC","CC","Similarity","Oracle","Precision","Recall","feedback"
 /// Line 2+ - values
 /// </summary>
 /// <param name="path"></param>
 /// <returns></returns>
 public static TLSimilarityMatrix Import(string path)
 {
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     matrix.Threshold = Double.MinValue;
     TextReader file = new StreamReader(path);
     file.ReadLine();
     string line;
     while ((line = file.ReadLine()) != null)
     {
         string[] item = line.Split(new char[] { ',', '"' }, StringSplitOptions.RemoveEmptyEntries);
         matrix.AddLink(item[1], item[2], Convert.ToDouble(item[3]));
     }
     return matrix;
 }
        public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships)
        {
            // create pseudo matrix for easy lookup
            // Dictionary<sourceID, Dictionary<targetID, score>>
            Dictionary<string, Dictionary<string, double>> storage = new Dictionary<string, Dictionary<string, double>>();
            foreach (TLSingleLink link in matrix.AllLinks)
            {
                if (!storage.ContainsKey(link.SourceArtifactId))
                {
                    storage.Add(link.SourceArtifactId, new Dictionary<string, double>());
                }
                storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score);
            }
#if UseDelta
            // compute delta
            double delta = SharedUtils.ComputeDelta(matrix);
#endif
            // iterate over every (source, target) pair
            TLLinksList links = matrix.AllLinks;
            links.Sort();
            foreach (TLSingleLink link in links)
            {
                // get the set of target artifacts related to link.TargetArtifactId
                // then update the value of (link.SourceArtifactId, relatedArtifact) by delta
                foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId))
                {
#if UseDelta
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta;
#else
                    storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1;
#endif
                }
            }
            // build new matrix
            TLLinksList newLinks = new TLLinksList();
            foreach (string source in storage.Keys)
            {
                foreach (string target in storage[source].Keys)
                {
                    newLinks.Add(new TLSingleLink(source, target, storage[source][target]));
                }
            }
            newLinks.Sort();
            TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
            foreach (TLSingleLink link in newLinks)
            {
                newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
            }
            return newMatrix;
        }
Exemple #12
0
        public static TLSimilarityMatrix Import(string directory)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            foreach (String file in Directory.GetFiles(directory))
            {
                String feature = Similarities.ExtractFeatureID(file);
                StreamReader links = new StreamReader(file);
                String link;

                while ((link = links.ReadLine()) != null)
                {
                    matrix.AddLink(feature, link, 1);
                }
                links.Close();
            }
            return matrix;
        }
Exemple #13
0
 public static TLSimilarityMatrix Compute(TLSimilarityMatrix sims, TLSimilarityMatrix relationships, TLSimilarityMatrix feedback)
 {
     // new matrix
     TLSimilarityMatrix newMatrix = new TLSimilarityMatrix();
     #if UseDelta
     // compute delta
     double delta = SharedUtils.ComputeDelta(sims);
     #endif
     // make sure the entire list is sorted
     TLLinksList links = sims.AllLinks;
     links.Sort();
     // end condition
     int correct = 0;
     // iterate over each source-target pair
     while (links.Count > 0 && correct < feedback.Count)
     {
         // get link at top of list
         TLSingleLink link = links[0];
         // check feedback
         if (feedback.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
         {
             correct++;
             // update related links
             for (int i = 1; i < links.Count; i++)
             {
                 if (link.SourceArtifactId.Equals(links[i].SourceArtifactId)
                     && relationships.IsLinkAboveThreshold(link.TargetArtifactId, links[i].TargetArtifactId))
                 {
     #if UseDelta
                     links[i].Score += links[i].Score * delta;
     #else
                     links[i].Score += links[i].Score * 0.1;
     #endif
                 }
             }
         }
         // remove link
         newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         links.RemoveAt(0);
         // reorder links
         links.Sort();
     }
     return newMatrix;
 }
Exemple #14
0
        public static TLSimilarityMatrix Import(String directory, List<String> map)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (String file in Directory.GetFiles(directory))
            {
                String feature = ExtractFeatureID(file);
                StreamReader idFile = new StreamReader(file);
                String line;

                while ((line = idFile.ReadLine()) != null)
                {
                    String[] vars = line.Split(' ');
                    sims.AddLink(feature, map[Convert.ToInt32(vars[0]) - 1], Convert.ToDouble(vars[2]));
                }
                idFile.Close();
            }

            return sims;
        }
        public static TLSimilarityMatrix Import(String directory)
        {
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();
            /* Each file is a link
             * filename.map - feature
             * Each line in file is a methodID
             */
            foreach (String file in Directory.GetFiles(directory))
            {
                String feature = ExtractFeatureID(file);
                StreamReader links = new StreamReader(file);
                String link;

                while ((link = links.ReadLine()) != null)
                {
                    matrix.AddLink(feature, link, 1);
                }
            }
            return matrix;
        }
Exemple #16
0
 /// <summary>
 /// Computes Jensen-Shannon divergence on two TermDocumentMatrices
 /// </summary>
 /// <param name="source">Source artifacts collection</param>
 /// <param name="target">Target artifacts collection</param>
 /// <returns>Similarity matrix</returns>
 public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target)
 {
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target);
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     for (int i = 0; i < matrices[0].NumDocs; i++)
     {
         TLLinksList list = new TLLinksList();
         for (int j = 0; j < matrices[1].NumDocs; j++)
         {
             list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
                 DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
         }
         list.Sort();
         foreach (TLSingleLink link in list)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Exemple #17
0
 /// <summary>
 /// Imports an answer set from file in the form (each line):
 /// SOURCE TARGET1 TARGET2 ...
 /// </summary>
 /// <param name="filename">File location</param>
 /// <returns>Similarity matrix (link score 1)</returns>
 public static TLSimilarityMatrix Import(String filename)
 {
     StreamReader file = new StreamReader(filename);
     TLSimilarityMatrix answer = new TLSimilarityMatrix();
     String line;
     while ((line = file.ReadLine()) != null)
     {
         String[] artifacts = line.Split();
         String source = artifacts[0];
         for (int i = 1; i < artifacts.Length; i++)
         {
             String target = artifacts[i].Trim();
             if (target != "")
             {
                 answer.AddLink(source, target, 1);
             }
         }
     }
     return answer;
 }
        public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (KeyValuePair<string, DocumentVector> QueryKVP in queries)
            {
                /*
                 * Since tf in queries are all 1,
                 * we can assume this term is the sqrt of the size of the dictionary
                 */
                double qVal = Math.Sqrt(QueryKVP.Value.Count);
                foreach (KeyValuePair<string, NormalizedVector> DocKVP in docs)
                {
                    double dVal = lengths[DocKVP.Key];
                    double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value);
                    sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal));
                }
            }

            return sims;
        }
        /// <summary>
        /// Computes the traceability between source and target artifacts using dictionary and American Corpus Term weigths.
        /// </summary>
        /// <param name="sourceArtifacts">The source artifacts.</param>
        /// <param name="targetArtifacts">The target artifacts.</param>
        /// <param name="dict">The dict.</param>
        /// <param name="ancTermsWeights">The anc terms weights.</param>
        /// <param name="config">The config.</param>
        /// <returns>Similarity matrix with links between source and target artifacts</returns>
        private static TLSimilarityMatrix ComputeTraceability(TLArtifactsCollection sourceArtifacts, 
                                                              TLArtifactsCollection targetArtifacts, 
                                                              TLDictionaryIndex dict, 
                                                              TLKeyValuePairsList ancTermsWeights, 
                                                              TracerConfig config)
        {
            if (sourceArtifacts == null)
            {
                throw new ComponentException("Received source artifacts are null!");
            }

            if (targetArtifacts == null)
            {
                throw new ComponentException("Received target artifacts are null!");
            }

            if (dict == null)
            {
                throw new ComponentException("Received dictionary index is null!");
            }

            if (ancTermsWeights == null)
            {
                throw new ComponentException("Received 'ancTermsWeights' is null!");
            }

            TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix();

            
            ANCSearcher searcher = new ANCSearcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric));

            // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search
            foreach (TLArtifact sourceArtifact in sourceArtifacts.Values)
            {

                String query = sourceArtifact.Text;

                // Executes the query
                List<Result> results;
                results = searcher.search(query, dict, PrepareANCData(ancTermsWeights));

                // Iterates over the results and stores them in the matrix
                foreach (Result r in results)
                {
                    string targetArtifactId = r.ArtifactId;
                    similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking);
                }
            }
            return similarityMatrix;
        }
 /// <summary>
 /// Collapses overloaded source artifacts, assigning the best score.
 /// </summary>
 /// <param name="matrix">Similarities</param>
 /// <returns>Collapsed artifacts</returns>
 public static TLSimilarityMatrix CollapseOverloadedTargets(TLSimilarityMatrix matrix)
 {
     Dictionary<string, Dictionary<string, double>> pseudomatrix = new Dictionary<string, Dictionary<string, double>>();
     foreach (TLSingleLink link in matrix.AllLinks)
     {
         if (!pseudomatrix.ContainsKey(link.SourceArtifactId))
         {
             pseudomatrix.Add(link.SourceArtifactId, new Dictionary<string,double>());
         }
         int startIndex = link.TargetArtifactId.IndexOf('(');
         string target = (startIndex > 0)
             ? link.TargetArtifactId.Substring(0, startIndex)
             : link.TargetArtifactId;
         if (!pseudomatrix[link.SourceArtifactId].ContainsKey(target))
         {
             pseudomatrix[link.SourceArtifactId].Add(target, link.Score);
         }
         else
         {
             if (link.Score > pseudomatrix[link.SourceArtifactId][target])
             {
                 pseudomatrix[link.SourceArtifactId][target] = link.Score;
             }
         }
     }
     TLSimilarityMatrix collapsedMatrix = new TLSimilarityMatrix();
     foreach (string sourceID in pseudomatrix.Keys)
     {
         foreach (string targetID in pseudomatrix[sourceID].Keys)
         {
             collapsedMatrix.AddLink(sourceID, targetID, pseudomatrix[sourceID][targetID]);
         }
     }
     return collapsedMatrix;
 }
 /// <summary>
 /// Extracts links containing the given artifact IDs from a similarity matrix.
 /// </summary>
 /// <param name="original">Original matrix</param>
 /// <param name="artifactIDs">List of artifact IDs</param>
 /// <param name="ignoreParameters">Flag to ignore parameter overloads and compare only method names.</param>
 /// <returns>Extracted links</returns>
 public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable<string> artifactIDs, bool ignoreParameters)
 {
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     foreach (TLSingleLink link in original)
     {
         string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0)
             ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('('))
             : link.SourceArtifactId;
         string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0)
             ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('('))
             : link.TargetArtifactId;
         if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
         {
             matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return matrix.AllLinks;
 }
Exemple #22
0
 /// <summary>
 /// Import script results
 /// </summary>
 /// <param name="result">RScriptResults object</param>
 /// <returns>Script results</returns>
 public override object ImportResults(RScriptResult result)
 {
     TextReader rfile = new StreamReader(_outputFile);
     string rawdata = rfile.ReadToEnd();
     rfile.Close();
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     string[] sims = rawdata.Remove(0, 2).Replace(")", String.Empty).Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
     string[] edges = Generics.ImportStrings(_info.Edges);
     if (sims.Length != edges.Length)
         throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + edges.Length);
     for (int i = 0; i < sims.Length; i++)
     {
         string[] split = edges[i].Split();
         matrix.AddLink(_corpus.Map[Convert.ToInt32(split[0])], _corpus.Map[Convert.ToInt32(split[1])], Convert.ToDouble(sims[i]));
     }
     //int src = 0;
     //int tgt = _source.DocMap.Count;
     //if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
     //{
     //    throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count));
     //}
     //foreach (string sim in sims)
     //{
     //    matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
     //    tgt++;
     //    if (tgt == _source.DocMap.Count + _target.DocMap.Count)
     //    {
     //        tgt = _source.DocMap.Count;
     //        src++;
     //    }
     //}
     return matrix;
 }
        /// <summary>
        /// Imports the answer set without validation against source and target artifacts
        /// </summary>
        /// <param name="filepath">The filepath.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="trimValues">if set to <c>true</c> [trim values].</param>
        /// <returns></returns>
        public static TLSimilarityMatrix ImportAnswerSet(string filepath, ComponentLogger logger, bool trimValues)
        {
            string friendlyAnswerSetFilename = System.IO.Path.GetFileName(filepath);
            
            TLSimilarityMatrix answerSet = new TLSimilarityMatrix();

            XPathDocument doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            //read collection info
            XPathNavigator iter = nav.SelectSingleNode("/answer_set/answer_info/source_artifacts_collection");
            string source_artifacts_collection_id = iter.Value;
            
            iter = nav.SelectSingleNode("/answer_set/answer_info/target_artifacts_collection");
            string target_artifacts_collection_id = iter.Value;
            
            XPathNodeIterator linksIterator = nav.Select("/answer_set/links/link");
            
            string source_artifact_id;
            string target_artifact_id;
            double confidence_score;
            while (linksIterator.MoveNext())
            {
                // Parse Source Artifact Id
                iter = linksIterator.Current.SelectSingleNode("source_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The source_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                source_artifact_id = iter.Value;
                if (trimValues)
                {
                    source_artifact_id = source_artifact_id.Trim();
                }
                            
                // Parse Target Artifact Id
                iter = linksIterator.Current.SelectSingleNode("target_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The target_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                target_artifact_id = iter.Value;
                if (trimValues)
                {
                    target_artifact_id = target_artifact_id.Trim();
                }
                
                //Parse confidence score
                iter = linksIterator.Current.SelectSingleNode("confidence_score");
                if (iter == null)
                {
                    //if confidence score is not provided set it to default value 1
                    confidence_score = 1.0;
                }
                else
                {
                    string tmpValue = iter.Value;
                    if (trimValues) tmpValue = tmpValue.Trim();

                    if (double.TryParse(tmpValue, out confidence_score) == false)
                    {
                        throw new XmlException(String.Format("The confidence score provided for link from source artifact {0} to target artifact is in incorrect format {1}. File location: {2}", source_artifact_id, target_artifact_id, filepath));
                    }
                }

                answerSet.AddLink(source_artifact_id, target_artifact_id, confidence_score);
            }

            return answerSet;
        }
        /// <summary>
        /// Imports the answer set.
        /// </summary>
        /// <param name="filepath">The filepath.</param>
        /// <param name="sourceArtifacts">The source artifacts.</param>
        /// <param name="sourceArtifactsFilePath">The source artifacts file path.</param>
        /// <param name="targetArtifacts">The target artifacts.</param>
        /// <param name="targetArtifactsFilePath">The target artifacts file path.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="trimValues">if set to <c>true</c> [trim values].</param>
        /// <returns></returns>
        public static TLSimilarityMatrix ImportAnswerSet(string filepath, TLArtifactsCollection sourceArtifacts, string sourceArtifactsFilePath, TLArtifactsCollection targetArtifacts, string targetArtifactsFilePath, ComponentLogger logger, bool trimValues)
        {
            string friendlyAnswerSetFilename = System.IO.Path.GetFileName(filepath);
            string friendlySourceArtifactsFilename = System.IO.Path.GetFileName(sourceArtifactsFilePath); 
            string friendlyTargetArtifactsFilename = System.IO.Path.GetFileName(targetArtifactsFilePath);

            TLSimilarityMatrix answerSet = new TLSimilarityMatrix();

            XPathDocument doc = new XPathDocument(filepath);
            XPathNavigator nav = doc.CreateNavigator();

            //read collection info
            XPathNavigator iter = nav.SelectSingleNode("/answer_set/answer_info/source_artifacts_collection");
            string source_artifacts_collection_id = iter.Value;
            if (source_artifacts_collection_id.Equals(sourceArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to source artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}", 
                                                    source_artifacts_collection_id, sourceArtifacts.CollectionId, filepath));
            }

            iter = nav.SelectSingleNode("/answer_set/answer_info/target_artifacts_collection");
            string target_artifacts_collection_id = iter.Value;
            if (target_artifacts_collection_id.Equals(targetArtifacts.CollectionId) == false)
            {
                throw new ArgumentException(String.Format("The answer set refers to target artifact collection with id '{0}', while loaded artifacts collection has different id '{1}'. Importing answer set from {2}", 
                                                    target_artifacts_collection_id, targetArtifacts.CollectionId, filepath));
            }

            XPathNodeIterator linksIterator = nav.Select("/answer_set/links/link");

            string source_artifact_id;
            string target_artifact_id;
            double confidence_score;
            while (linksIterator.MoveNext())
            {
                // Parse Source Artifact Id
                iter = linksIterator.Current.SelectSingleNode("source_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The source_artifact_id has not been provided for the link. File location: {0}", filepath));
                }
                
                source_artifact_id = iter.Value;
                if (trimValues)
                {
                    source_artifact_id = source_artifact_id.Trim();
                }

                if (sourceArtifacts.ContainsKey(source_artifact_id) == false)
                {
                    logger.Warn(String.Format("The source artifact id '{0}' referenced in the answer set {1} has not been found in the source artifacts {2}. Therefore, this link has been removed in this experiment.", source_artifact_id, friendlyAnswerSetFilename, friendlySourceArtifactsFilename));
                }

                // Parse Target Artifact Id
                iter = linksIterator.Current.SelectSingleNode("target_artifact_id");
                if (iter == null)
                {
                    throw new XmlException(String.Format("The target_artifact_id has not been provided for the link. File location: {0}", filepath));
                }

                target_artifact_id = iter.Value;
                if (trimValues)
                {
                    target_artifact_id = target_artifact_id.Trim();
                }

                if (targetArtifacts.ContainsKey(target_artifact_id) == false)
                {
                    logger.Warn(String.Format("The target artifact id '{0}' referenced in the answer set {1} has not been found in the target artifacts {2}. Therefore, this link has been removed in this experiment.", target_artifact_id, friendlyAnswerSetFilename, friendlyTargetArtifactsFilename));
                }

                //Parse confidence score
                iter = linksIterator.Current.SelectSingleNode("confidence_score");
                if (iter == null)
                {
                    //if confidence score is not provided set it to default value 1
                    confidence_score = 1.0;
                }
                else
                {
                    string tmpValue = iter.Value;
                    if (trimValues) tmpValue = tmpValue.Trim();

                    if (double.TryParse(tmpValue, out confidence_score) == false)
                    {
                        throw new XmlException(String.Format("The confidence score provided for link from source artifact {0} to target artifact is in incorrect format {1}. File location: {2}", source_artifact_id, target_artifact_id, filepath));
                    }
                }

                answerSet.AddLink(source_artifact_id, target_artifact_id, confidence_score);
            }

            return answerSet;
        }
        private static TLSimilarityMatrix Process(TLArtifactsCollection sourceArtifacts, TLDictionaryIndex dict, TracerConfig config)
        {
            if (sourceArtifacts == null)
            {
                throw new ComponentException("Received null sourceArtifacts");
            }

            if (dict == null)
            {
                throw new ComponentException("Received null dictionaryIndex");
            }

            TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix();

            Searcher searcher = new Searcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric));

            // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search
            foreach (TLArtifact sourceArtifact in sourceArtifacts.Values)
            {

                String query = sourceArtifact.Text;

                // Executes the query
                List<Result> results;
                results = searcher.search(query, dict);

                // Iterates over the results and stores them in the matrix
                foreach (Result r in results)
                {
                    string targetArtifactId = r.ArtifactId;
                    similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking);
                }
            }

            return similarityMatrix;
        }
Exemple #26
0
 /// <summary>
 /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus
 /// </summary>
 /// <param name="ids">Boolean document vectors</param>
 /// <param name="tfidf">tf-idf weighted document vectors</param>
 /// <returns>Similarity matrix</returns>
 private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf)
 {
     TLSimilarityMatrix sims = new TLSimilarityMatrix();
     List<TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf);
     for (int i = 0; i < ids.NumDocs; i++)
     {
         TLLinksList links = new TLLinksList();
         for (int j = 0; j < tfidf.NumDocs; j++)
         {
             double product = 0.0;
             double asquared = 0.0;
             double bsquared = 0.0;
             for (int k = 0; k < matrices[0].NumTerms; k++)
             {
                 double a = matrices[0][i, k];
                 double b = matrices[1][j, k];
                 product += (a * b);
                 asquared += Math.Pow(a, 2);
                 bsquared += Math.Pow(b, 2);
             }
             double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared);
             if (cross == 0.0)
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0));
             }
             else
             {
                 links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross));
             }
         }
         links.Sort();
         foreach (TLSingleLink link in links)
         {
             sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
         }
     }
     return sims;
 }
Exemple #27
0
 /// <summary>
 /// Imports script results
 /// </summary>
 /// <param name="result">RScriptResults object</param>
 /// <returns>Script results</returns>
 public override object ImportResults(RScriptResult result)
 {
     TextReader rfile = new StreamReader(_outputFile);
     string rawdata = rfile.ReadToEnd();
     rfile.Close();
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     string[] sims = rawdata.Remove(0,2).Replace(")", String.Empty).Split(new char[] {','}, StringSplitOptions.RemoveEmptyEntries);
     int src = 0;
     int tgt = _source.DocMap.Count;
     if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
     {
         throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count));
     }
     foreach (string sim in sims)
     {
         matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
         tgt++;
         if (tgt == _source.DocMap.Count + _target.DocMap.Count)
         {
             tgt = _source.DocMap.Count;
             src++;
         }
     }
     return matrix;
 }
 public void RunBeforeEachTest()
 {
     oracle = new TLSimilarityMatrix();
     oracle.AddLink("A", "B*", 1);
     oracle.AddLink("A", "C*", 1);
     oracle.AddLink("A", "D*", 1);
     sims = new TLSimilarityMatrix();
     /* Sorted order:
      * sims.AddLink("A", "B*", 10);
      * sims.AddLink("A", "E",  9);
      * sims.AddLink("A", "F",  8);
      * sims.AddLink("A", "C*", 7);
      * sims.AddLink("A", "G",  6);
      * sims.AddLink("A", "H",  5);
      * sims.AddLink("A", "I",  4);
      * sims.AddLink("A", "J",  3);
      * sims.AddLink("A", "D*", 2);
      * sims.AddLink("A", "K",  1);
      */
     sims.AddLink("A", "G", 6);
     sims.AddLink("A", "K", 1);
     sims.AddLink("A", "B*", 10);
     sims.AddLink("A", "E", 9);
     sims.AddLink("A", "J", 3);
     sims.AddLink("A", "F", 8);
     sims.AddLink("A", "C*", 7);
     sims.AddLink("A", "H", 5);
     sims.AddLink("A", "D*", 2);
     sims.AddLink("A", "I", 4);
 }
Exemple #29
0
 /// <summary>
 /// Imports an oracle from a directory of files.
 /// Each file is a source artifact containing targets on each line.
 /// </summary>
 /// <param name="directory"></param>
 /// <returns></returns>
 public static TLSimilarityMatrix ImportDirectory(string directory)
 {
     TLSimilarityMatrix oracle = new TLSimilarityMatrix();
     foreach (string file in Directory.GetFiles(directory))
     {
         string id = Path.GetFileName(file);
         TextReader fReader = new StreamReader(file);
         string line;
         while ((line = fReader.ReadLine()) != null)
         {
             if (String.IsNullOrWhiteSpace(line))
                 continue;
             oracle.AddLink(id, line, 1);
         }
     }
     return oracle;
 }
Exemple #30
0
 /// <summary>
 /// Import script results
 /// </summary>
 /// <param name="result">RScriptResults object</param>
 /// <returns>Script results</returns>
 public override object ImportResults(RScriptResult result)
 {
     // index = id - 1
     string[] ids = Generics.ImportStrings(_mapFile);
     TextReader resultsMatrix = new StreamReader(_outputFile);
     TLSimilarityMatrix matrix = new TLSimilarityMatrix();
     string[] sources = resultsMatrix.ReadLine().Split();
     string line;
     while ((line = resultsMatrix.ReadLine()) != null)
     {
         if (String.IsNullOrWhiteSpace(line))
             continue;
         // [0] target id, [x+] source sims index = x - 1
         string[] entries = line.Split();
         string entry = ids[Convert.ToInt32(entries[0]) - 1];
         for (int i = 0; i < sources.Length; i++)
         {
             matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1]));
         }
     }
     resultsMatrix.Close();
     return matrix;
 }