public void ComputeLSA() { TLArtifactsCollection source = TermDocumentMatrix.Load(@"../../Data/LSA/source.txt").ToTLArtifactsCollection(); TLArtifactsCollection target = TermDocumentMatrix.Load(@"../../Data/LSA/target.txt").ToTLArtifactsCollection(); REngine engine = new REngine(Settings.Default.RScriptEXE); TLSimilarityMatrix matrix = (TLSimilarityMatrix)engine.Execute(new LSAScript(source, target, new LSAConfig { Dimensions = 3 })); TLSimilarityMatrix correct = Similarities.Import(@"../../Data/LSA/correct.txt"); foreach (TLSingleLink link in matrix.AllLinks) { Assert.AreEqual(correct.GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId), link.Score, Settings.Default.DoublePrecision ); } }
public void ConstructorTest_Artifacts() { string data = @"../../Data/SimpleCorpus."; TermDocumentMatrix matrix = new TermDocumentMatrix(Artifacts.Import(data + "input.target.txt")); TermDocumentMatrix answer = TermDocumentMatrix.Load(data + "output.target.matrix.txt"); // counts Assert.AreEqual(matrix.NumDocs, answer.NumDocs); Assert.AreEqual(matrix.NumTerms, answer.NumTerms); // matrix for (int i = 0; i < answer.NumDocs; i++) { Assert.AreEqual(matrix.GetDocumentName(i), answer.GetDocumentName(i)); Assert.AreEqual(matrix.GetDocument(i).Length, answer.NumTerms); for (int j = 0; j < answer.NumTerms; j++) { Assert.AreEqual(matrix.GetTermName(j), answer.GetTermName(j)); Assert.AreEqual(matrix[i, j], answer[i, j], 0.0); } } }
public void ConstructorTest_Artifacts() { string inputData = Settings.Default.SimpleCorpusDir; string outputData = Path.Combine(inputData, "TermDocumentMatrix"); TermDocumentMatrix matrix = new TermDocumentMatrix(Artifacts.ImportFile(Path.Combine(inputData, "target.txt"))); TermDocumentMatrix answer = TermDocumentMatrix.Load(Path.Combine(outputData, "output.txt")); // counts Assert.AreEqual(matrix.NumDocs, answer.NumDocs); Assert.AreEqual(matrix.NumTerms, answer.NumTerms); // matrix for (int i = 0; i < answer.NumDocs; i++) { Assert.AreEqual(matrix.GetDocumentName(i), answer.GetDocumentName(i)); Assert.AreEqual(matrix.GetDocument(i).Length, answer.NumTerms); for (int j = 0; j < answer.NumTerms; j++) { Assert.AreEqual(matrix.GetTermName(j), answer.GetTermName(j)); Assert.AreEqual(matrix[i, j], answer[i, j], 0.0); } } }
public override void Compute() { Workspace.Store("Artifacts", TermDocumentMatrix.Load(_config.CorpusDocument.Absolute)); }