public void RunBeforeEachTest() { oracle = new TLSimilarityMatrix(); oracle.AddLink("A", "B*", 1); oracle.AddLink("A", "C*", 1); oracle.AddLink("A", "D*", 1); sims = new TLSimilarityMatrix(); /* Sorted order: * sims.AddLink("A", "B*", 10); * sims.AddLink("A", "E", 9); * sims.AddLink("A", "F", 8); * sims.AddLink("A", "C*", 7); * sims.AddLink("A", "G", 6); * sims.AddLink("A", "H", 5); * sims.AddLink("A", "I", 4); * sims.AddLink("A", "J", 3); * sims.AddLink("A", "D*", 2); * sims.AddLink("A", "K", 1); */ sims.AddLink("A", "G", 6); sims.AddLink("A", "K", 1); sims.AddLink("A", "B*", 10); sims.AddLink("A", "E", 9); sims.AddLink("A", "J", 3); sims.AddLink("A", "F", 8); sims.AddLink("A", "C*", 7); sims.AddLink("A", "H", 5); sims.AddLink("A", "D*", 2); sims.AddLink("A", "I", 4); }
/// <summary> /// Computes the cosine similarity between the given document pairs in the matrix /// </summary> /// <param name="matrix">Term-by-document matrix</param> /// <param name="sourceIDs">Collection of source artifacts ids</param> /// /// <param name="targetIDs">Collection of target artifacts ids</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix matrix, IEnumerable <string> sourceIDs, IEnumerable <string> targetIDs) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (string sourceID in sourceIDs) { double[] sourceDoc = matrix.GetDocument(sourceID); foreach (string targetID in targetIDs) { // compute cosine similarity between source and target double[] targetDoc = matrix.GetDocument(targetID); double lengthProduct = ComputeLength(sourceDoc) * ComputeLength(targetDoc); if (lengthProduct == 0.0) { sims.AddLink(sourceID, targetID, 0.0); } else { double score = ComputeDotProduct(sourceDoc, targetDoc) / lengthProduct; sims.AddLink(sourceID, targetID, score); } } } return(sims); }
/// <summary> /// Imports a file in the form (each line): /// SOURCE TARGET SCORE /// </summary> /// <param name="filename">Similarities file</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix Import(String filename) { StreamReader file = new StreamReader(filename); TLSimilarityMatrix answer = new TLSimilarityMatrix(); String line; int num = 0; while ((line = file.ReadLine()) != null) { num++; if (String.IsNullOrWhiteSpace(line)) { continue; } try { String[] artifacts = line.Split(); String source = artifacts[0]; String target = artifacts[1]; double score = Convert.ToDouble(artifacts[2]); answer.AddLink(source, target, score); } catch (IndexOutOfRangeException e) { file.Close(); throw new InvalidDataException("Invalid data format on line " + num + " of file:" + filename, e); } } file.Close(); return(answer); }
/// <summary> /// Imports script results /// </summary> /// <param name="result">RScriptResults object</param> /// <returns>Script results</returns> public override object ImportResults(RScriptResult result) { TextReader rfile = new StreamReader(_outputFile); string rawdata = rfile.ReadToEnd(); rfile.Close(); TLSimilarityMatrix matrix = new TLSimilarityMatrix(); string[] sims = rawdata.Remove(0, 2).Replace(")", String.Empty).Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); int src = 0; int tgt = _source.DocMap.Count; if (sims.Length != _source.DocMap.Count * _target.DocMap.Count) { throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count)); } foreach (string sim in sims) { matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim())); tgt++; if (tgt == _source.DocMap.Count + _target.DocMap.Count) { tgt = _source.DocMap.Count; src++; } } return(matrix); }
public void SimilarityMatrixRawSerializationTest() { string[] sources = new string[] { "source1", "source2", "source3", "source4", "source5", "source6", "source7", "source8", "source9", "source10" }; string[] targets = new string[] { "target1", "target2", "target3", "target4", "target5", "target6", "target7", "target8", "target9", "target10" }; TLSimilarityMatrix matrixIn = new TLSimilarityMatrix(); for (int i = 0; i < sources.Length; i++) { matrixIn.AddLink(sources[i], targets[i], (double)i); } BinaryWriter binWriter = new BinaryWriter(new MemoryStream()); BinaryReader binReader = new BinaryReader(binWriter.BaseStream); matrixIn.WriteData(binWriter); binReader.BaseStream.Position = 0; TLSimilarityMatrix matrixOut = new TLSimilarityMatrix(); matrixOut.ReadData(binReader); Assert.AreEqual(matrixIn.Count, matrixOut.Count); StringHashSet setIn = matrixIn.SourceArtifactsIds; StringHashSet setOut = matrixOut.SourceArtifactsIds; foreach (string artifact in setIn) { Assert.IsTrue(setOut.Contains(artifact)); } }
private static TLSimilarityMatrix Process(TLArtifactsCollection sourceArtifacts, TLDictionaryIndex dict, TracerConfig config) { if (sourceArtifacts == null) { throw new ComponentException("Received null sourceArtifacts"); } if (dict == null) { throw new ComponentException("Received null dictionaryIndex"); } TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix(); Searcher searcher = new Searcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric)); // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { String query = sourceArtifact.Text; // Executes the query List <Result> results; results = searcher.search(query, dict); // Iterates over the results and stores them in the matrix foreach (Result r in results) { string targetArtifactId = r.ArtifactId; similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking); } } return(similarityMatrix); }
/// <summary> /// Computes cosine similarities between two TermDocumentMatrices. /// Cosine similarity is defined as (dot product) / (length * length) /// </summary> /// <param name="m1">Binary document matrix</param> /// <param name="m2">tf-idf weighted document matrix</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix ComputeCosine(TermDocumentMatrix m1, TermDocumentMatrix m2) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(m1, m2); for (int i = 0; i < m1.NumDocs; i++) { TLLinksList links = new TLLinksList(); for (int j = 0; j < m2.NumDocs; j++) { double lengthProduct = ComputeLength(matrices[0].GetDocument(i)) * ComputeLength(matrices[1].GetDocument(j)); if (lengthProduct == 0.0) { links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), 0.0)); } else { links.Add(new TLSingleLink(m1.GetDocumentName(i), m2.GetDocumentName(j), ComputeDotProduct(matrices[0].GetDocument(i), matrices[1].GetDocument(j)) / lengthProduct)); } } links.Sort(); foreach (TLSingleLink link in links) { sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(sims); }
/// <summary> /// Import script results /// </summary> /// <param name="result">RScriptResults object</param> /// <returns>Script results</returns> public override object ImportResults(RScriptResult result) { // index = id - 1 string[] ids = Generics.ImportStrings(_mapFile); TextReader resultsMatrix = new StreamReader(_outputFile); TLSimilarityMatrix matrix = new TLSimilarityMatrix(); string[] sources = resultsMatrix.ReadLine().Split(); string line; while ((line = resultsMatrix.ReadLine()) != null) { if (String.IsNullOrWhiteSpace(line)) { continue; } // [0] target id, [x+] source sims index = x - 1 string[] entries = line.Split(); string entry = ids[Convert.ToInt32(entries[0]) - 1]; for (int i = 0; i < sources.Length; i++) { matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1])); } } resultsMatrix.Close(); return(matrix); }
public static TLSimilarityMatrix CreateMatrix(TLLinksList list) { TLSimilarityMatrix matrix = new TLSimilarityMatrix(); foreach (TLSingleLink link in list) { matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } return(matrix); }
// this takes a long time public static void RemoveNonExecutedMethods(ref TLSimilarityMatrix sourceMatrix, ref TLSimilarityMatrix targetMatrix, String feature, Dictionary <string, int> executedMethods) { foreach (TLSingleLink link in sourceMatrix.AllLinks) { if (link.SourceArtifactId == feature && executedMethods.ContainsKey(Regex.Replace(link.TargetArtifactId, "(\\(.*\\))", ""))) { targetMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } }
public override void Compute() { TLSimilarityMatrix matrix1 = (TLSimilarityMatrix)Workspace.Load("Matrix1"); TLSimilarityMatrix matrix2 = (TLSimilarityMatrix)Workspace.Load("Matrix2"); foreach (TLSingleLink link in matrix2.AllLinks) { matrix1.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } Workspace.Store("Merged", matrix1); }
/// <summary> /// Normalizes a similarity matrix /// </summary> /// <param name="matrix">Similarity matrix</param> /// <returns>Normalized similarity matrix</returns> public static TLSimilarityMatrix Normalize(TLSimilarityMatrix matrix) { TLSimilarityMatrix norm = new TLSimilarityMatrix(); double mean = TLSimilarityMatrixUtil.AverageSimilarity(matrix); double stdDev = TLSimilarityMatrixUtil.SimilarityStandardDeviation(matrix); foreach (TLSingleLink link in matrix.AllLinks) { norm.AddLink(link.SourceArtifactId, link.TargetArtifactId, (link.Score - mean) / stdDev); } return(norm); }
public static TLSimilarityMatrix Compute(TLSimilarityMatrix matrix, TLSimilarityMatrix relationships) { // create pseudo matrix for easy lookup // Dictionary<sourceID, Dictionary<targetID, score>> Dictionary <string, Dictionary <string, double> > storage = new Dictionary <string, Dictionary <string, double> >(); foreach (TLSingleLink link in matrix.AllLinks) { if (!storage.ContainsKey(link.SourceArtifactId)) { storage.Add(link.SourceArtifactId, new Dictionary <string, double>()); } storage[link.SourceArtifactId].Add(link.TargetArtifactId, link.Score); } #if UseDelta // compute delta double delta = SharedUtils.ComputeDelta(matrix); #endif // iterate over every (source, target) pair TLLinksList links = matrix.AllLinks; links.Sort(); foreach (TLSingleLink link in links) { // get the set of target artifacts related to link.TargetArtifactId // then update the value of (link.SourceArtifactId, relatedArtifact) by delta foreach (string relatedArtifact in relationships.GetSetOfTargetArtifactIdsAboveThresholdForSourceArtifact(link.TargetArtifactId)) { #if UseDelta storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * delta; #else storage[link.SourceArtifactId][relatedArtifact] += storage[link.SourceArtifactId][relatedArtifact] * 0.1; #endif } } // build new matrix TLLinksList newLinks = new TLLinksList(); foreach (string source in storage.Keys) { foreach (string target in storage[source].Keys) { newLinks.Add(new TLSingleLink(source, target, storage[source][target])); } } newLinks.Sort(); TLSimilarityMatrix newMatrix = new TLSimilarityMatrix(); foreach (TLSingleLink link in newLinks) { newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } return(newMatrix); }
private static void RemoveNonFeature(ref TLSimilarityMatrix sims, FeatureSet set, Dictionary <int, string> qmap) { TLSimilarityMatrix target = new TLSimilarityMatrix(); string feature = GetFeatureSetType(set); foreach (TLSingleLink link in sims.AllLinks) { if (qmap[Convert.ToInt32(link.SourceArtifactId)] == feature) { target.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } sims = target; }
/// <summary> /// Performs an affine transformation on two similarity matrices. /// </summary> /// <param name="large">Large expert</param> /// <param name="small">Small expert</param> /// <param name="lambda">Weight given to large expert</param> /// <returns>Transformed similarities</returns> public static TLSimilarityMatrix Transform(TLSimilarityMatrix large, TLSimilarityMatrix small, double lambda) { TLSimilarityMatrix largeNormal = Normalize(large); TLSimilarityMatrix smallNormal = Normalize(small); TLSimilarityMatrix combined = new TLSimilarityMatrix(); foreach (TLSingleLink largeLink in largeNormal.AllLinks) { double smallLink = smallNormal.GetScoreForLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId); combined.AddLink(largeLink.SourceArtifactId, largeLink.TargetArtifactId, Combine(largeLink.Score, smallLink, lambda)); } return(combined); }
/// <summary> /// Computes the traceability between source and target artifacts using dictionary and American Corpus Term weigths. /// </summary> /// <param name="sourceArtifacts">The source artifacts.</param> /// <param name="targetArtifacts">The target artifacts.</param> /// <param name="dict">The dict.</param> /// <param name="ancTermsWeights">The anc terms weights.</param> /// <param name="config">The config.</param> /// <returns>Similarity matrix with links between source and target artifacts</returns> private static TLSimilarityMatrix ComputeTraceability(TLArtifactsCollection sourceArtifacts, TLArtifactsCollection targetArtifacts, TLDictionaryIndex dict, TLKeyValuePairsList ancTermsWeights, TracerConfig config) { if (sourceArtifacts == null) { throw new ComponentException("Received source artifacts are null!"); } if (targetArtifacts == null) { throw new ComponentException("Received target artifacts are null!"); } if (dict == null) { throw new ComponentException("Received dictionary index is null!"); } if (ancTermsWeights == null) { throw new ComponentException("Received 'ancTermsWeights' is null!"); } TLSimilarityMatrix similarityMatrix = new TLSimilarityMatrix(); ANCSearcher searcher = new ANCSearcher(SimilarityMetricFactory.GetSimiliarityMetric(config.SimilarityMetric)); // Iterates over all the source artifacts to determine the probabilities to target artifacts - by executing a search foreach (TLArtifact sourceArtifact in sourceArtifacts.Values) { String query = sourceArtifact.Text; // Executes the query List <Result> results; results = searcher.search(query, dict, PrepareANCData(ancTermsWeights)); // Iterates over the results and stores them in the matrix foreach (Result r in results) { string targetArtifactId = r.ArtifactId; similarityMatrix.AddLink(sourceArtifact.Id, targetArtifactId, r.Ranking); } } return(similarityMatrix); }
internal static TLSimilarityMatrix GenerateOracle(string rankFile, string mapFile) { Console.WriteLine("Generating oracle..."); IEnumerable <double> ranks = Generics.ImportDoubles(rankFile, false); IEnumerable <string> map = Generics.ImportStrings(mapFile); Assert.AreEqual(map.Count(), ranks.Count()); TLSimilarityMatrix oracle = new TLSimilarityMatrix(); for (int i = 0; i < map.Count(); i++) { oracle.AddLink("trace", map.ElementAt(i), ranks.ElementAt(i)); } return(oracle); }
/// <summary> /// Import script results /// </summary> /// <param name="result">RScriptResults object</param> /// <returns>Script results</returns> public override object ImportResults(RScriptResult result) { IEnumerable <double> ranks = Generics.ImportDoubles(_outputFile, false); IEnumerable <string> map = Generics.ImportStrings(_mappingFile); if (ranks.Count() != map.Count()) { throw new RDataException("Results file in incorrect format: incorrect number of entries"); } TLSimilarityMatrix rankList = new TLSimilarityMatrix(); for (int i = 0; i < map.Count(); i++) { rankList.AddLink(_traceID, map.ElementAt(i), ranks.ElementAt(i)); } return(rankList); }
/// <summary> /// FORMAT /// ====== /// Line 1 - "","UC","CC","Similarity","Oracle","Precision","Recall","feedback" /// Line 2+ - values /// </summary> /// <param name="path"></param> /// <returns></returns> public static TLSimilarityMatrix Import(string path) { TLSimilarityMatrix matrix = new TLSimilarityMatrix(); matrix.Threshold = Double.MinValue; TextReader file = new StreamReader(path); file.ReadLine(); string line; while ((line = file.ReadLine()) != null) { string[] item = line.Split(new char[] { ',', '"' }, StringSplitOptions.RemoveEmptyEntries); matrix.AddLink(item[1], item[2], Convert.ToDouble(item[3])); } return(matrix); }
public static TLSimilarityMatrix Compute(TLSimilarityMatrix sims, TLSimilarityMatrix relationships, TLSimilarityMatrix feedback) { // new matrix TLSimilarityMatrix newMatrix = new TLSimilarityMatrix(); #if UseDelta // compute delta double delta = SharedUtils.ComputeDelta(sims); #endif // make sure the entire list is sorted TLLinksList links = sims.AllLinks; links.Sort(); // end condition int correct = 0; // iterate over each source-target pair while (links.Count > 0 && correct < feedback.Count) { // get link at top of list TLSingleLink link = links[0]; // check feedback if (feedback.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId)) { correct++; // update related links for (int i = 1; i < links.Count; i++) { if (link.SourceArtifactId.Equals(links[i].SourceArtifactId) && relationships.IsLinkAboveThreshold(link.TargetArtifactId, links[i].TargetArtifactId)) { #if UseDelta links[i].Score += links[i].Score * delta; #else links[i].Score += links[i].Score * 0.1; #endif } } } // remove link newMatrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); links.RemoveAt(0); // reorder links links.Sort(); } return(newMatrix); }
public static TLSimilarityMatrix Import(string directory) { TLSimilarityMatrix matrix = new TLSimilarityMatrix(); foreach (String file in Directory.GetFiles(directory)) { String feature = Similarities.ExtractFeatureID(file); StreamReader links = new StreamReader(file); String link; while ((link = links.ReadLine()) != null) { matrix.AddLink(feature, link, 1); } links.Close(); } return(matrix); }
/// <summary> /// Extracts links containing the given artifact IDs from a similarity matrix. /// </summary> /// <param name="original">Original matrix</param> /// <param name="artifactIDs">List of artifact IDs</param> /// <param name="ignoreParameters">Flag to ignore parameter overloads and compare only method names.</param> /// <returns>Extracted links</returns> public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable <string> artifactIDs, bool ignoreParameters) { TLSimilarityMatrix matrix = new TLSimilarityMatrix(); foreach (TLSingleLink link in original) { string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0) ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('(')) : link.SourceArtifactId; string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0) ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('(')) : link.TargetArtifactId; if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID)) { matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(matrix.AllLinks); }
public static void Split(ref TLSimilarityMatrix original, Dictionary <int, string> qmap, ref TLSimilarityMatrix bugs, ref TLSimilarityMatrix features, ref TLSimilarityMatrix patch) { foreach (TLSingleLink link in original.AllLinks) { string feature = qmap[Convert.ToInt32(link.SourceArtifactId)]; if (feature == Trace.GetFeatureSetType(FeatureSet.Bugs)) { bugs.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } else if (feature == Trace.GetFeatureSetType(FeatureSet.Features)) { features.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } else if (feature == Trace.GetFeatureSetType(FeatureSet.Patch)) { patch.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } }
public static TLSimilarityMatrix Import(String directory, List <String> map) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (String file in Directory.GetFiles(directory)) { String feature = ExtractFeatureID(file); StreamReader idFile = new StreamReader(file); String line; while ((line = idFile.ReadLine()) != null) { String[] vars = line.Split(' '); sims.AddLink(feature, map[Convert.ToInt32(vars[0]) - 1], Convert.ToDouble(vars[2])); } idFile.Close(); } return(sims); }
/// <summary> /// Imports an oracle from a directory of files. /// Each file is a source artifact containing targets on each line. /// </summary> /// <param name="directory"></param> /// <returns></returns> public static TLSimilarityMatrix ImportDirectory(string directory) { TLSimilarityMatrix oracle = new TLSimilarityMatrix(); foreach (string file in Directory.GetFiles(directory)) { string id = Path.GetFileName(file); TextReader fReader = new StreamReader(file); string line; while ((line = fReader.ReadLine()) != null) { if (String.IsNullOrWhiteSpace(line)) { continue; } oracle.AddLink(id, line, 1); } } return(oracle); }
/// <summary> /// Imports an answer set from file in the form (each line): /// SOURCE TARGET1 TARGET2 ... /// </summary> /// <param name="filename">File location</param> /// <returns>Similarity matrix (link score 1)</returns> public static TLSimilarityMatrix Import(String filename) { StreamReader file = new StreamReader(filename); TLSimilarityMatrix answer = new TLSimilarityMatrix(); String line; while ((line = file.ReadLine()) != null) { String[] artifacts = line.Split(); String source = artifacts[0]; for (int i = 1; i < artifacts.Length; i++) { String target = artifacts[i].Trim(); if (target != "") { answer.AddLink(source, target, 1); } } } return(answer); }
/// <summary> /// Computes Jensen-Shannon divergence on two TermDocumentMatrices /// </summary> /// <param name="source">Source artifacts collection</param> /// <param name="target">Target artifacts collection</param> /// <returns>Similarity matrix</returns> public static TLSimilarityMatrix Compute(TermDocumentMatrix source, TermDocumentMatrix target) { List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(source, target); TLSimilarityMatrix sims = new TLSimilarityMatrix(); for (int i = 0; i < matrices[0].NumDocs; i++) { TLLinksList list = new TLLinksList(); for (int j = 0; j < matrices[1].NumDocs; j++) { list.Add(new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j), DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j)))); } list.Sort(); foreach (TLSingleLink link in list) { sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(sims); }
public static TLSimilarityMatrix Import(String directory) { TLSimilarityMatrix matrix = new TLSimilarityMatrix(); /* Each file is a link * filename.map - feature * Each line in file is a methodID */ foreach (String file in Directory.GetFiles(directory)) { String feature = ExtractFeatureID(file); StreamReader links = new StreamReader(file); String link; while ((link = links.ReadLine()) != null) { matrix.AddLink(feature, link, 1); } } return(matrix); }
public static TLSimilarityMatrix Compute(NormalizedVectorCollection docs, NormalizedVector lengths, DocumentVectorCollection queries) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); foreach (KeyValuePair <string, DocumentVector> QueryKVP in queries) { /* * Since tf in queries are all 1, * we can assume this term is the sqrt of the size of the dictionary */ double qVal = Math.Sqrt(QueryKVP.Value.Count); foreach (KeyValuePair <string, NormalizedVector> DocKVP in docs) { double dVal = lengths[DocKVP.Key]; double qdVec = ComputeProduct(QueryKVP.Value, DocKVP.Value); sims.AddLink(QueryKVP.Key, DocKVP.Key, qdVec / (qVal * dVal)); } } return(sims); }
/// <summary> /// Computes cosine similarities between a set of boolean document vectors and a tfidf weighted corpus /// </summary> /// <param name="ids">Boolean document vectors</param> /// <param name="tfidf">tf-idf weighted document vectors</param> /// <returns>Similarity matrix</returns> private static TLSimilarityMatrix ComputeSimilarities(TermDocumentMatrix ids, TermDocumentMatrix tfidf) { TLSimilarityMatrix sims = new TLSimilarityMatrix(); List <TermDocumentMatrix> matrices = TermDocumentMatrix.Equalize(ids, tfidf); for (int i = 0; i < ids.NumDocs; i++) { TLLinksList links = new TLLinksList(); for (int j = 0; j < tfidf.NumDocs; j++) { double product = 0.0; double asquared = 0.0; double bsquared = 0.0; for (int k = 0; k < matrices[0].NumTerms; k++) { double a = matrices[0][i, k]; double b = matrices[1][j, k]; product += (a * b); asquared += Math.Pow(a, 2); bsquared += Math.Pow(b, 2); } double cross = Math.Sqrt(asquared) * Math.Sqrt(bsquared); if (cross == 0.0) { links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), 0.0)); } else { links.Add(new TLSingleLink(ids.GetDocumentName(i), tfidf.GetDocumentName(j), product / cross)); } } links.Sort(); foreach (TLSingleLink link in links) { sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score); } } return(sims); }