Example #1
0
        /// <summary>
        /// Import script results
        /// </summary>
        /// <param name="result">RScriptResults object</param>
        /// <returns>Script results</returns>
        public override object ImportResults(RScriptResult result)
        {
            // index = id - 1
            string[]           ids           = Generics.ImportStrings(_mapFile);
            TextReader         resultsMatrix = new StreamReader(_outputFile);
            TLSimilarityMatrix matrix        = new TLSimilarityMatrix();

            string[] sources = resultsMatrix.ReadLine().Split();
            string   line;

            while ((line = resultsMatrix.ReadLine()) != null)
            {
                if (String.IsNullOrWhiteSpace(line))
                {
                    continue;
                }
                // [0] target id, [x+] source sims index = x - 1
                string[] entries = line.Split();
                string   entry   = ids[Convert.ToInt32(entries[0]) - 1];
                for (int i = 0; i < sources.Length; i++)
                {
                    matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1]));
                }
            }
            resultsMatrix.Close();
            return(matrix);
        }
Example #2
0
        /// <summary>
        /// Imports script results
        /// </summary>
        /// <param name="result">RScriptResults object</param>
        /// <returns>Script results</returns>
        public override object ImportResults(RScriptResult result)
        {
            TextReader rfile   = new StreamReader(_outputFile);
            string     rawdata = rfile.ReadToEnd();

            rfile.Close();
            TLSimilarityMatrix matrix = new TLSimilarityMatrix();

            string[] sims = rawdata.Remove(0, 2).Replace(")", String.Empty).Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
            int      src  = 0;
            int      tgt  = _source.DocMap.Count;

            if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
            {
                throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count));
            }
            foreach (string sim in sims)
            {
                matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
                tgt++;
                if (tgt == _source.DocMap.Count + _target.DocMap.Count)
                {
                    tgt = _source.DocMap.Count;
                    src++;
                }
            }
            return(matrix);
        }
        private static void CreateCSVReport(TLSimilarityMatrix similarityMatrix, string outputPath)
        {
            if (similarityMatrix == null)
            {
                throw new ComponentException("Received similarity matrix is null!");
            }

            if (outputPath == null)
            {
                throw new ComponentException("Output path cannot be null.");
            }

            if (!System.IO.Path.IsPathRooted(outputPath))
            {
                throw new ComponentException(String.Format("Absolute output path is required. Given path is '{0}'", outputPath));
            }

            if (outputPath.EndsWith(".csv", StringComparison.CurrentCultureIgnoreCase) == false)
            {
                outputPath = outputPath + ".csv";
            }

            using (System.IO.TextWriter writeFile = new StreamWriter(outputPath))
            {
                ReadSimilarityMatrixToFile(similarityMatrix, writeFile);
                writeFile.Flush();
                writeFile.Close();
            }
        }
Example #4
0
        /// <summary>
        /// Computes the cosine similarity between the given document pairs in the matrix
        /// </summary>
        /// <param name="matrix">Term-by-document matrix</param>
        /// <param name="sourceIDs">Collection of source artifacts ids</param>
        /// /// <param name="targetIDs">Collection of target artifacts ids</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix matrix, IEnumerable <string> sourceIDs, IEnumerable <string> targetIDs)
        {
            TLSimilarityMatrix sims = new TLSimilarityMatrix();

            foreach (string sourceID in sourceIDs)
            {
                double[] sourceDoc = matrix.GetDocument(sourceID);
                foreach (string targetID in targetIDs)
                {
                    // compute cosine similarity between source and target
                    double[] targetDoc     = matrix.GetDocument(targetID);
                    double   lengthProduct = ComputeLength(sourceDoc) * ComputeLength(targetDoc);
                    if (lengthProduct == 0.0)
                    {
                        sims.AddLink(sourceID, targetID, 0.0);
                    }
                    else
                    {
                        double score = ComputeDotProduct(sourceDoc, targetDoc) / lengthProduct;
                        sims.AddLink(sourceID, targetID, score);
                    }
                }
            }
            return(sims);
        }
Example #5
0
        private static TLSimilarityMatrix ConvertRocco(ref Info info, string input, string output)
        {
            TLSimilarityMatrix matrix = RanklistImporter.Import(input);

            Similarities.Export(matrix, output);
            return(matrix);
        }
Example #6
0
        /// <summary>
        /// Exports an answer matrix to a file.
        /// </summary>
        /// <param name="answerMatrix">Answer matrix</param>
        /// <param name="filename">Output file path</param>
        public static void Export(TLSimilarityMatrix answerMatrix, string filename)
        {
            TextWriter tw = null;

            try
            {
                tw = new StreamWriter(filename);
                foreach (string sourceID in answerMatrix.SourceArtifactsIds)
                {
                    tw.Write(sourceID);
                    foreach (string targetID in answerMatrix.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(sourceID))
                    {
                        tw.Write(" " + targetID);
                    }
                    tw.WriteLine();
                }
                tw.Flush();
                tw.Close();
            }
            catch (Exception e)
            {
                if (tw != null)
                {
                    tw.Close();
                }
                throw new DevelopmentKitException("There was an exception writing to file (" + filename + ")", e);
            }
        }
        public static void Export(TLSimilarityMatrix answerSet, string sourceId, string targetId, string outputPath)
        {
            if (answerSet == null)
            {
                throw new TraceLabSDK.ComponentException("Received null answer similarity matrix");
            }

            System.Xml.XmlWriterSettings settings = new System.Xml.XmlWriterSettings();
            settings.Indent          = true;
            settings.CloseOutput     = true;
            settings.CheckCharacters = true;

            //create file
            using (System.Xml.XmlWriter writer = System.Xml.XmlWriter.Create(outputPath, settings))
            {
                writer.WriteStartDocument();

                writer.WriteStartElement("answer_set");

                WriteAnswerSetInfo(writer, sourceId, targetId);

                WriteLinks(answerSet, writer);

                writer.WriteEndElement(); //answer_set

                writer.WriteEndDocument();

                writer.Close();
            }

            System.Diagnostics.Trace.WriteLine("File created , you can find the file " + outputPath);
        }
Example #8
0
        public static void ComputeMetrics(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel level,
                                          out TLKeyValuePairsList precision, out TLKeyValuePairsList recall, out TLKeyValuePairsList avgPrecision, out TLKeyValuePairsList meanAvgPrecision)
        {
            TLLinksList links             = MetricsUtil.GetLinksAtRecall(sims, oracle, level);
            int         numCorrect        = 0;
            int         totalRead         = 0;
            double      totalAvgPrecision = 0.0;

            foreach (TLSingleLink link in links)
            {
                totalRead++;
                if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                    totalAvgPrecision += numCorrect / (double)totalRead;
                }
            }
            // temporary
            precision = new TLKeyValuePairsList();
            precision.Add(new KeyValuePair <string, double>("#TOTAL", numCorrect / Convert.ToDouble(links.Count)));
            recall = new TLKeyValuePairsList();
            recall.Add(new KeyValuePair <string, double>("#TOTAL", Math.Ceiling(oracle.Count * RecallLevelUtil.RecallValue(level)) / oracle.Count));
            avgPrecision = new TLKeyValuePairsList();
            avgPrecision.Add(new KeyValuePair <string, double>("#TOTAL", totalAvgPrecision / oracle.Count));
            meanAvgPrecision = new TLKeyValuePairsList();
            meanAvgPrecision.Add(new KeyValuePair <string, double>("#TOTAL", MeanAveragePrecision.Compute(Similarities.CreateMatrix(links), oracle)));
        }
        public override void Compute()
        {
            TLSimilarityMatrix matrix = (TLSimilarityMatrix)Workspace.Load("Matrix");
            TLSimilarityMatrix pruned = TLSimilarityMatrixUtil.CreateMatrix(TLSimilarityMatrixUtil.RemoveBottomPercentage(matrix, _config.Percentage));

            Workspace.Store("PrunedMatrix", pruned);
        }
Example #10
0
        public override void Compute()
        {
            TLSimilarityMatricesCollection listOfMatrices = (TLSimilarityMatricesCollection)Workspace.Load("listOfMatrices");

            if (listOfMatrices == null)
            {
                throw new ComponentException("The 'listOfMatrices' is null or has not been found in the Workspace.");
            }

            TLSimilarityMatrix matrix = (TLSimilarityMatrix)Workspace.Load("matrix");

            if (matrix == null)
            {
                throw new ComponentException("The 'matrix' input is null or has not been found in the Workspace.");
            }

            string name = (string)Workspace.Load("name");

            if (name == null)
            {
                throw new ComponentException("The 'name' for matrix is null or has not been found in the Workspace. Please, provide the name for the matrix.");
            }

            matrix.Name = name;

            listOfMatrices.Add(matrix);

            Workspace.Store("listOfMatrices", listOfMatrices);
        }
Example #11
0
        public void EmptyDictionaryIndexTest()
        {
            TLArtifactsCollection sourceArtifacts = new TLArtifactsCollection();

            sourceArtifacts.Add(new TLArtifact("id", "text"));
            TLArtifactsCollection targetArtifacts = new TLArtifactsCollection();

            targetArtifacts.Add(new TLArtifact("id", "text"));
            TLDictionaryIndex dictionary = new TLDictionaryIndex();

            Workspace.Store("sourceArtifacts", sourceArtifacts);
            Workspace.Store("targetArtifacts", targetArtifacts);
            Workspace.Store("dictionaryIndex", dictionary);

            ((TracerConfig)TestComponent.Configuration).SimilarityMetric = SimilarityMetricMethod.SimpleMatching;

            TestComponent.Compute();

            TLSimilarityMatrix simMat = (TLSimilarityMatrix)Workspace.Load("similarityMatrix");

            if (simMat == null || simMat.Count != 0)
            {
                Assert.Fail("Similarity Matrix should still be created but have nothing in it");
            }
        }
Example #12
0
        public void TestTracingOfComponent()
        {
            TLArtifactsCollection sourceArtifacts = new TLArtifactsCollection();
            TLArtifactsCollection targetArtifacts = new TLArtifactsCollection();
            TLDictionaryIndex     dictionary      = new TLDictionaryIndex();

            // TODO: add inputs that matter
            sourceArtifacts.Add(new TLArtifact("id1", "first text"));
            sourceArtifacts.Add(new TLArtifact("id2", "words to do stuff with"));
            sourceArtifacts.Add(new TLArtifact("id3", "some more text"));

            targetArtifacts.Add(new TLArtifact("id1", "hello world"));
            targetArtifacts.Add(new TLArtifact("id2", "very very random yes indeed"));
            targetArtifacts.Add(new TLArtifact("id3", "yep"));
            targetArtifacts.Add(new TLArtifact("id4", "chickens in the coop"));

            dictionary.AddTermEntry("term", 3, 3, 0.2);

            Workspace.Store("sourceArtifacts", sourceArtifacts);
            Workspace.Store("targetArtifacts", targetArtifacts);
            Workspace.Store("dictionaryIndex", dictionary);

            ((TracerConfig)TestComponent.Configuration).SimilarityMetric = SimilarityMetricMethod.SimpleMatching;

            TestComponent.Compute();

            TLSimilarityMatrix simMat = (TLSimilarityMatrix)Workspace.Load("similarityMatrix");

            // TODO: add tests to make sure the output is correctly formatted
            Assert.Fail();
        }
        public static DataSetPairs Compute(TLSimilarityMatrix sims, TLSimilarityMatrix oracle, RecallLevel recall)
        {
            TLSimilarityMatrix matrix = Similarities.CreateMatrix(MetricsUtil.GetLinksAtRecall(sims, oracle, recall));

            matrix.Threshold = double.MinValue;
            DataSetPairs pairs = new DataSetPairs();

            foreach (string sourceArtifact in oracle.SourceArtifactsIds)
            {
                TLLinksList links = matrix.GetLinksAboveThresholdForSourceArtifact(sourceArtifact);
                links.Sort();
                int    totalCorrect      = oracle.GetLinksAboveThresholdForSourceArtifact(sourceArtifact).Count;
                int    numCorrect        = 0;
                int    totalRead         = 0;
                double totalAvgPrecision = 0.0;
                foreach (TLSingleLink link in links)
                {
                    totalRead++;
                    if (oracle.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                    {
                        numCorrect++;
                        totalAvgPrecision += numCorrect / (double)totalRead;
                    }
                }
                pairs.PrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, numCorrect / Convert.ToDouble(links.Count)));
                pairs.RecallData.Add(new KeyValuePair <string, double>(sourceArtifact, Convert.ToDouble(numCorrect) / totalCorrect));
                pairs.AveragePrecisionData.Add(new KeyValuePair <string, double>(sourceArtifact, totalAvgPrecision / totalCorrect));
            }

            pairs.MeanAveragePrecisionData.Add(new KeyValuePair <string, double>("#TOTAL", DataSetPairsCollection.CalculateAverage(pairs.AveragePrecisionData)));
            return(pairs);
        }
Example #14
0
        public override void Compute()
        {
            List <string>      map  = (List <string>)Workspace.Load("DocumentMap");
            TLSimilarityMatrix sims = Similarities.Import(_config.Directory.Absolute, map);

            Workspace.Store("Similarities", sims);
        }
Example #15
0
        /// <summary>
        /// Computes the score for the given current results.
        /// </summary>
        /// <param name="baselineMatrices">The baseline matrices.</param>
        /// <param name="currentResults">The current results.</param>
        /// <returns></returns>
        public static double ComputeScore(TLSimilarityMatricesCollection baselineMatrices, TLSimilarityMatricesCollection currentResultsMatrices,
                                          TLDatasetsList datasets, MetricComputationComponentConfig config)
        {
            double score = 0.0;

            double sum    = 0;
            double counts = 0;

            IMetricComputation metricComputation = GetMetricComputation(config);

            foreach (TLDataset dataset in datasets)
            {
                TLSimilarityMatrix baseline = baselineMatrices[dataset.Name];
                TLSimilarityMatrix current  = currentResultsMatrices[dataset.Name];

                //score is computed based on delta between two techniques from metric computation
                SortedDictionary <string, double> baselineValues = metricComputation.Calculate(baseline, dataset);
                SortedDictionary <string, double> currentValues  = metricComputation.Calculate(current, dataset);

                var deltas = ScoreComputationHelper.Delta(baselineValues, currentValues);

                //now compute average of computed deltas, and that's the score

                foreach (double delta in deltas.Values)
                {
                    sum += delta;
                }

                counts += deltas.Count;
            }

            score = sum / counts;

            return(score);
        }
Example #16
0
        /// <summary>
        /// Imports a file in the form (each line):
        /// SOURCE TARGET SCORE
        /// </summary>
        /// <param name="filename">Similarities file</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix Import(String filename)
        {
            StreamReader       file   = new StreamReader(filename);
            TLSimilarityMatrix answer = new TLSimilarityMatrix();
            String             line;
            int num = 0;

            while ((line = file.ReadLine()) != null)
            {
                num++;
                if (String.IsNullOrWhiteSpace(line))
                {
                    continue;
                }
                try
                {
                    String[] artifacts = line.Split();
                    String   source    = artifacts[0];
                    String   target    = artifacts[1];
                    double   score     = Convert.ToDouble(artifacts[2]);
                    answer.AddLink(source, target, score);
                }
                catch (IndexOutOfRangeException e)
                {
                    file.Close();
                    throw new InvalidDataException("Invalid data format on line " + num + " of file:" + filename, e);
                }
            }
            file.Close();
            return(answer);
        }
        public override void Compute()
        {
            TLSimilarityMatrix similarityMatrix = (TLSimilarityMatrix)Workspace.Load("SimilarityMatrix");

            DevelopmentKit.IO.Similarities.ExportCSV(similarityMatrix, Config.Path.Absolute);
            Logger.Info(String.Format("Matrix has been saved into csv file '{0}'", Config.Path.Absolute));
        }
Example #18
0
        public override void Compute()
        {
            TLSimilarityMatrix sims   = (TLSimilarityMatrix)Workspace.Load("Similarities");
            TLSimilarityMatrix oracle = (TLSimilarityMatrix)Workspace.Load("Oracle");

            Workspace.Store("DataSetPairs", OverallMetricsComputation.ComputeAll(sims, oracle));
        }
        public void RunBeforeEachTest()
        {
            oracle = new TLSimilarityMatrix();
            oracle.AddLink("A", "B*", 1);
            oracle.AddLink("A", "C*", 1);
            oracle.AddLink("A", "D*", 1);
            sims = new TLSimilarityMatrix();

            /* Sorted order:
             * sims.AddLink("A", "B*", 10);
             * sims.AddLink("A", "E",  9);
             * sims.AddLink("A", "F",  8);
             * sims.AddLink("A", "C*", 7);
             * sims.AddLink("A", "G",  6);
             * sims.AddLink("A", "H",  5);
             * sims.AddLink("A", "I",  4);
             * sims.AddLink("A", "J",  3);
             * sims.AddLink("A", "D*", 2);
             * sims.AddLink("A", "K",  1);
             */
            sims.AddLink("A", "G", 6);
            sims.AddLink("A", "K", 1);
            sims.AddLink("A", "B*", 10);
            sims.AddLink("A", "E", 9);
            sims.AddLink("A", "J", 3);
            sims.AddLink("A", "F", 8);
            sims.AddLink("A", "C*", 7);
            sims.AddLink("A", "H", 5);
            sims.AddLink("A", "D*", 2);
            sims.AddLink("A", "I", 4);
        }
Example #20
0
        private static TLSimilarityMatrix Process(TLArtifactsCollection sourceArtifacts, TLDictionaryIndex dict, TracerConfig config)
        {
            if (sourceArtifacts == null)
            {
                throw new ComponentException("Received null sourceArtifacts");
            }

            if (dict == null)
            {
                throw new ComponentException("Received null dictionaryIndex");
            }

            TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix();

            Searcher searcher = new Searcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric));

            // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search
            foreach (TLArtifact sourceArtifact in sourceArtifacts.Values)
            {
                String query = sourceArtifact.Text;

                // Executes the query
                List <Result> results;
                results = searcher.search(query, dict);

                // Iterates over the results and stores them in the matrix
                foreach (Result r in results)
                {
                    string targetArtifactId = r.ArtifactId;
                    similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking);
                }
            }

            return(similarityMatrix);
        }
        public override void Compute()
        {
            Logger.Trace("Starting metrics computation: " + _config.TechniqueName);
            TLSimilarityMatrix  matrix    = (TLSimilarityMatrix)Workspace.Load("CandidateMatrix");
            TLSimilarityMatrix  oracle    = (TLSimilarityMatrix)Workspace.Load("AnswerMatrix");
            TLExperimentResults exResults = new TLExperimentResults(_config.TechniqueName);

            #region Effectiveness Best Measure
            if (_config.EffectivenessBestMeasure)
            {
                Logger.Trace("Computing effectiveness best measure...");
                IMetricComputation computation = new EffectivenessBestMeasureComputation(matrix, oracle);
                computation.Compute();
                ResultsController.Instance.AddResult(_config.TechniqueName, _config.DatasetName, computation);
            }
            else
            {
                Logger.Trace("Skipped effectiveness best measure computation.");
            }
            #endregion
            #region Effectiveness All Measure
            if (_config.EffectivenessAllMeasure)
            {
                Logger.Trace("Computing effectiveness all measure...");
                IMetricComputation computation = new EffectivenessAllMeasureComputation(matrix, oracle);
                computation.Compute();
                ResultsController.Instance.AddResult(_config.TechniqueName, _config.DatasetName, computation);
            }
            else
            {
                Logger.Trace("Skipped effectiveness all measure computation.");
            }
            #endregion
        }
        public override void Compute()
        {
            List <CSMR13DataSet> ListOfDatasets = (List <CSMR13DataSet>)Workspace.Load("ListOfDatasets");
            int CurrentDataset = (int)Workspace.Load("CurrentDataset");

            ListOfDatasets[CurrentDataset].Metrics      = new DataSetPairsCollection();
            ListOfDatasets[CurrentDataset].Similarities = new List <TLSimilarityMatrix>();
            string[] IRmodels         = new string[] { "VSM", "JS" };
            string[] StructuralModels = new string[] { "OCSTI", "UDCSTI" };
            foreach (string IRmodel in IRmodels)
            {
                TLSimilarityMatrix baseSims = (TLSimilarityMatrix)Workspace.Load(IRmodel + "_Similarities");
                baseSims.Name = IRmodel;
                ListOfDatasets[CurrentDataset].Similarities.Add(baseSims);
                foreach (DataSetPairs basePairs in (DataSetPairsCollection)Workspace.Load(IRmodel + "_Metrics"))
                {
                    basePairs.Name = IRmodel + " @" + basePairs.Name;
                    ListOfDatasets[CurrentDataset].Metrics.Add(basePairs);
                }
                foreach (string StructuralModel in StructuralModels)
                {
                    TLSimilarityMatrix structuralSims = (TLSimilarityMatrix)Workspace.Load(IRmodel + "_" + StructuralModel);
                    structuralSims.Name = IRmodel + "_" + StructuralModel;
                    ListOfDatasets[CurrentDataset].Similarities.Add(structuralSims);
                    foreach (DataSetPairs structuralPairs in (DataSetPairsCollection)Workspace.Load(IRmodel + "_" + StructuralModel + "_Metrics"))
                    {
                        structuralPairs.Name = IRmodel + " " + StructuralModel + " @" + structuralPairs.Name;
                        ListOfDatasets[CurrentDataset].Metrics.Add(structuralPairs);
                    }
                }
            }
            Workspace.Store("ListOfDatasets", ListOfDatasets);
        }
Example #23
0
        /// <summary>
        /// Returns links for the desired recall level.
        /// </summary>
        /// <param name="matrix">Candidate matrix</param>
        /// <param name="answerMatrix">Answer matrix</param>
        /// <param name="level">Desired recall level</param>
        /// <returns>List of links at desired recall</returns>
        public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
        {
            if (level <= 0.0 || level > 1.0)
            {
                throw new DevelopmentKitException("Recall level must be between 0 and 1.");
            }
            double      totalCorrect = answerMatrix.Count * level;
            int         numCorrect   = 0;
            TLLinksList links        = matrix.AllLinks;

            links.Sort();
            TLLinksList newLinks = new TLLinksList();

            while (links.Count > 0 && numCorrect < totalCorrect)
            {
                TLSingleLink link = links[0];
                if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
                {
                    numCorrect++;
                }
                newLinks.Add(link);
                links.RemoveAt(0);
            }
            return(newLinks);
        }
Example #24
0
        public void SimilarityMatrixRawSerializationTest()
        {
            string[] sources = new string[] { "source1", "source2", "source3", "source4", "source5", "source6", "source7", "source8", "source9", "source10" };
            string[] targets = new string[] { "target1", "target2", "target3", "target4", "target5", "target6", "target7", "target8", "target9", "target10" };

            TLSimilarityMatrix matrixIn = new TLSimilarityMatrix();

            for (int i = 0; i < sources.Length; i++)
            {
                matrixIn.AddLink(sources[i], targets[i], (double)i);
            }

            BinaryWriter binWriter = new BinaryWriter(new MemoryStream());
            BinaryReader binReader = new BinaryReader(binWriter.BaseStream);

            matrixIn.WriteData(binWriter);

            binReader.BaseStream.Position = 0;

            TLSimilarityMatrix matrixOut = new TLSimilarityMatrix();

            matrixOut.ReadData(binReader);

            Assert.AreEqual(matrixIn.Count, matrixOut.Count);

            StringHashSet setIn  = matrixIn.SourceArtifactsIds;
            StringHashSet setOut = matrixOut.SourceArtifactsIds;

            foreach (string artifact in setIn)
            {
                Assert.IsTrue(setOut.Contains(artifact));
            }
        }
        public override void Compute()
        {
            TLSimilarityMatrix sims   = (TLSimilarityMatrix)Workspace.Load("Similarities");
            TLSimilarityMatrix oracle = (TLSimilarityMatrix)Workspace.Load("Oracle");

            Workspace.Store("DataSetPairs", MetricsPerSourceArtifact.Compute(sims, oracle, _config.Recall));
        }
Example #26
0
        /// <summary>
        /// Computes cosine similarities between two TermDocumentMatrices.
        /// Cosine similarity is defined as (dot product) / (length * length)
        /// </summary>
        /// <param name="m1">Binary document matrix</param>
        /// <param name="m2">tf-idf weighted document matrix</param>
        /// <returns>Similarity matrix</returns>
        public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2)
        {
            TLSimilarityMatrix        sims     = new TLSimilarityMatrix();
            List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2);

            for (int i = 0; i < m1.NumDocs; i++)
            {
                TLLinksList links = new TLLinksList();
                for (int j = 0; j < m2.NumDocs; j++)
                {
                    double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j));
                    if (lengthProduct == 0.0)
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0));
                    }
                    else
                    {
                        links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct));
                    }
                }
                links.Sort();
                foreach (TLSingleLink link in links)
                {
                    sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
                }
            }
            return(sims);
        }
        public override void Compute()
        {
            TLSimilarityMatrix matrix = (TLSimilarityMatrix)Workspace.Load("Matrix");
            TLSimilarityMatrix pruned = TLSimilarityMatrixUtil.CreateMatrix(TLSimilarityMatrixUtil.GetTopNLinks(matrix, _config.N));

            Workspace.Store("PrunedMatrix", pruned);
        }
Example #28
0
        /// <summary>
        /// Computes the delta value for an individual artifact
        /// </summary>
        /// <param name="matrix">Similarities</param>
        /// <param name="source">Source artifact id</param>
        /// <returns>delta</returns>
        public static double ComputeForSourceArtifact(TLSimilarityMatrix matrix, string source)
        {
            matrix.Threshold = double.MinValue;
            double min = Double.MaxValue;
            double max = Double.MinValue;

            foreach (TLSingleLink link in matrix.GetLinksAboveThresholdForSourceArtifact(source))
            {
                if (link.Score < min)
                {
                    min = link.Score;
                }
                if (link.Score > max)
                {
                    max = link.Score;
                }
            }
            double delta = (max - min) / 2.0;

            // according to R scripts
            if (delta < 0.05)
            {
                delta = Math.Pow(delta, 4) / 4;
            }
            return(delta);
        }
Example #29
0
 /// <summary>
 /// Exports a TLSimilarityMatrix to CSV with an additional column for correct links
 /// 0 - incorrect link
 /// 1 - correct link
 /// </summary>
 /// <param name="similarityMatrix">Candidate Matrix</param>
 /// <param name="answerMatrix">Answer Matrix</param>
 /// <param name="outputPath">Output file path</param>
 public static void ExportCSVWithCorrectness(TLSimilarityMatrix similarityMatrix, TLSimilarityMatrix answerMatrix, string outputPath)
 {
     if (similarityMatrix == null)
     {
         throw new DevelopmentKitException("Received similarity matrix is null!");
     }
     if (answerMatrix == null)
     {
         throw new DevelopmentKitException("Received answer similarity matrix is null!");
     }
     if (outputPath == null)
     {
         throw new DevelopmentKitException("Output path cannot be null.");
     }
     if (!System.IO.Path.IsPathRooted(outputPath))
     {
         throw new DevelopmentKitException(String.Format("Absolute output path is required. Given path is '{0}'", outputPath));
     }
     if (outputPath.EndsWith(".csv", StringComparison.CurrentCultureIgnoreCase) == false)
     {
         outputPath = outputPath + ".csv";
     }
     using (System.IO.TextWriter writeFile = new StreamWriter(outputPath))
     {
         WriteMatrixCSVWithCorrectness(similarityMatrix, answerMatrix, writeFile);
         writeFile.Flush();
         writeFile.Close();
     }
 }
Example #30
0
        public override void Compute()
        {
            TLSimilarityMatrix sims          = (TLSimilarityMatrix)Workspace.Load("Similarities");
            TLSimilarityMatrix relationships = (TLSimilarityMatrix)Workspace.Load("StructuralRelationships");

            Workspace.Store("O-CSTI_Similarities", OCSTI.Compute(sims, relationships));
        }
        /// <summary>
        /// Reads the data. (allows faster custom serialization for better performance in TraceLab)
        /// </summary>
        /// <param name="reader">The reader.</param>
        public void ReadData(System.IO.BinaryReader reader)
        {
            int dataversion = reader.ReadInt32();
            if (dataversion != TLDataset.version)
            {
                throw new InvalidOperationException("Binary reader did not read correct data version. Data corrupted. Potentially IRawSerializable not implemented correctly");
            }
            else
            {
                this.m_name = reader.ReadString();

                bool isMemberPresent = reader.ReadBoolean();
                if (isMemberPresent)
                {
                    TLArtifactsCollection artifacts = new TLArtifactsCollection();
                    artifacts.ReadData(reader);
                    this.SourceArtifacts = artifacts;
                }

                isMemberPresent = reader.ReadBoolean();
                if (isMemberPresent)
                {
                    TLArtifactsCollection artifacts = new TLArtifactsCollection();
                    artifacts.ReadData(reader);
                    this.TargetArtifacts = artifacts;
                }

                isMemberPresent = reader.ReadBoolean();
                if (isMemberPresent)
                {
                    TLSimilarityMatrix matrix = new TLSimilarityMatrix();
                    matrix.ReadData(reader);
                    this.AnswerSet = matrix;
                }
            }
        }