/// <summary> /// Precompute method /// </summary> public override void PreCompute() { RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript); CheckParameters(); _outputFile = RUtil.ReserveCacheFile("LSA.out"); DirectoryInfo corpusDir = SaveArtifactsToCache(_source, _target, "LSA.corpus"); _arguments = new List <object>(); _arguments.Add(corpusDir.FullName); _arguments.Add(_SourceFile); _arguments.Add(_TargetFile); _arguments.Add(_outputFile); _arguments.Add(_config.Dimensions); }
private DirectoryInfo SaveArtifactsToCache(TLArtifactsCollection source, TLArtifactsCollection target, string name) { DirectoryInfo infoDir = RUtil.CreateCacheDirectory(name); FileStream sFile = RUtil.CreateCacheFile("LSA.corpus.source"); TextWriter sourceWriter = new StreamWriter(sFile); _SourceFile = sFile.Name; FileStream tFile = RUtil.CreateCacheFile("LSA.corpus.target"); TextWriter targetWriter = new StreamWriter(tFile); _TargetFile = tFile.Name; FileStream mFile = RUtil.CreateCacheFile("LSA.corpus.map"); TextWriter mapWriter = new StreamWriter(mFile); _mapFile = mFile.Name; int fileIndex = 1; foreach (TLArtifact artifact in source.Values) { TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString())); tw.Write(artifact.Text); tw.Flush(); tw.Close(); sourceWriter.WriteLine(fileIndex); mapWriter.WriteLine(artifact.Id); fileIndex++; } sourceWriter.Flush(); sourceWriter.Close(); foreach (TLArtifact artifact in target.Values) { TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString())); tw.Write(artifact.Text); tw.Flush(); tw.Close(); targetWriter.WriteLine(fileIndex); mapWriter.WriteLine(artifact.Id); fileIndex++; } targetWriter.Flush(); targetWriter.Close(); mapWriter.Flush(); mapWriter.Close(); return(infoDir); }
/// <summary> /// Precompute method /// </summary> public override void PreCompute() { RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript); DirectoryInfo sourceInfo = SaveArtifactsToCache(_source, "GibbsLDA.source"); DirectoryInfo targetInfo = SaveArtifactsToCache(_target, "GibbsLDA.target"); _outputFile = RUtil.ReserveCacheFile("GibbsLDA.out"); _arguments = new List <object>(); _arguments.Add(sourceInfo.FullName); _arguments.Add(targetInfo.FullName); _arguments.Add(_outputFile); _arguments.Add(_config.NumTopics); _arguments.Add(_config.GibbsIterations); _arguments.Add(_config.Alpha); _arguments.Add(_config.Beta); _arguments.Add(_config.Seed); }
/// <summary> /// Saves corpus to cache. /// Overwrites existing files with the same name. /// </summary> /// <returns>Corpus base path + name</returns> public LDACorpusInfo Save() { LDACorpusInfo info = new LDACorpusInfo(); info.Name = Name; // write matrix FileStream cFS = RUtil.CreateCacheFile(Name + ".corpus"); info.Corpus = cFS.Name; TextWriter corpus = new StreamWriter(cFS); corpus.Write(Matrix); corpus.Flush(); corpus.Close(); // write vocab FileStream vFS = RUtil.CreateCacheFile(Name + ".vocab"); info.Vocab = vFS.Name; TextWriter vocab = new StreamWriter(vFS); vocab.Write(Vocab); vocab.Flush(); vocab.Close(); // write edges FileStream eFS = RUtil.CreateCacheFile(Name + ".tableWriter"); info.Edges = eFS.Name; TextWriter edges = new StreamWriter(eFS); edges.Write(Edges); edges.Flush(); edges.Close(); // write links FileStream lFS = RUtil.CreateCacheFile(Name + ".links"); info.Links = lFS.Name; TextWriter links = new StreamWriter(lFS); links.Write(Links); links.Flush(); links.Close(); // return info return(info); }
/// <summary> /// Precompute method /// </summary> public override void PreCompute() { RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript); LDACorpus corpus = new LDACorpus("LDA", _source, _target); LDACorpusInfo info = corpus.Save(); _outputFile = RUtil.ReserveCacheFile("LDA.out"); _arguments = new List <object>(); _arguments.Add(info.Corpus); _arguments.Add(info.Vocab); _arguments.Add(info.Edges); _arguments.Add(_outputFile); _arguments.Add(_config.NumTopics); _arguments.Add(_config.NumIterations); _arguments.Add(_config.Alpha); _arguments.Add(_config.Eta); _arguments.Add(_config.PredictionBeta); _arguments.Add(_config.Seed); }
/// <summary> /// Precompute method /// </summary> public override void PreCompute() { RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript); _info = _corpus.Save(); _outputFile = RUtil.ReserveCacheFile("RTM.out"); _arguments = new List <object>(); _arguments.Add(_info.Corpus); _arguments.Add(_info.Vocab); _arguments.Add(_info.Edges); _arguments.Add(_info.Links); _arguments.Add(_outputFile); _arguments.Add(_config.NumTopics); _arguments.Add(_config.NumIterations); _arguments.Add(_config.Alpha); _arguments.Add(_config.Eta); _arguments.Add(_config.RTMBeta); _arguments.Add(_config.PredictionBeta); _arguments.Add(_config.Seed); }
/// <summary> /// Creates a table of results for input into PCA /// </summary> /// <param name="matrices">Array of matrices</param> /// <returns>Table in R format</returns> private string CreateTable(params TLSimilarityMatrix[] matrices) { if (matrices.Length < 2) { throw new RDataException("Must have at least 2 matrices."); } FileStream tableFile = RUtil.CreateCacheFile("PCA.table"); TextWriter tableWriter = new StreamWriter(tableFile); tableWriter.Write("M1"); for (int i = 1; i < matrices.Length; i++) { if (matrices[i].Count != matrices[0].Count) { throw new RDataException("Matrices have different count of links."); } tableWriter.Write(String.Format("\tM{0}", i + 1)); } tableWriter.WriteLine(); foreach (TLSingleLink link in matrices[0].AllLinks) { tableWriter.Write(String.Format("{0}_{1}\t{2}", link.SourceArtifactId, link.TargetArtifactId, link.Score )); for (int i = 1; i < matrices.Length; i++) { tableWriter.Write(String.Format("\t{0}", matrices[i].GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId))); } tableWriter.WriteLine(); } tableWriter.Flush(); tableWriter.Close(); return(tableFile.Name); }
private string GenerateAdjacencyMatrix() { int n = _pdg.Nodes.Count(); double defaultValue = 1.0 / n; double[] rowValues = new double[n]; FileStream matrixFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.matrix"); TextWriter matrixWriter = new StreamWriter(matrixFS); //FileStream edgeFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.edges"); //TextWriter edgeWriter = new StreamWriter(edgeFS); FileStream mapFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.map"); _mappingFile = mapFS.Name; TextWriter mapWriter = new StreamWriter(mapFS); for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++) { PDGNode pdgNode = _pdg.GetNode(nodeIndex); for (int i = 0; i < n; i++) { rowValues[i] = 0; } //edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count()); // write number of outgoing edges for Topical HITS algorithm for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++) { PDGEdge pdgOutgoingEdge = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge); int columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID); // for positive values only if ((columnFrequencies < 0)) { throw new RDataException(); // continue; } if (_config.Weight == WebMiningWeightEnum.Binary) { rowValues[columnFrequencies] = 1; } else if (_config.Weight == WebMiningWeightEnum.Frequency) { rowValues[columnFrequencies] = pdgOutgoingEdge.Weight; } else { throw new RDataException("Unknown weighting scheme: " + _config.Weight); } } //for (int i=1;i<=n;i++) //{ // matrixWriter.Write(rowValuesFrequencies[i]+" "); // binaryWriter.Write(rowValuesBinary[i]+" "); //} matrixWriter.WriteLine(String.Join(" ", rowValues)); mapWriter.WriteLine(pdgNode.MethodName); } matrixWriter.Flush(); matrixWriter.Close(); //edgeWriter.Flush(); //edgeWriter.Close(); mapWriter.Flush(); mapWriter.Close(); return(matrixFS.Name); }
private string GenerateTransitionProbabilityMatrix() { int n = _pdg.Nodes.Count(); double defaultValue = 1.0 / n; double[] rowValues = new double[n]; FileStream matrixFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.matrix"); TextWriter matrixWriter = new StreamWriter(matrixFS); FileStream edgeFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.edges"); TextWriter edgeWriter = new StreamWriter(edgeFS); FileStream mapFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.map"); _mappingFile = mapFS.Name; TextWriter mapWriter = new StreamWriter(mapFS); for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++) { PDGNode pdgNode = _pdg.GetNode(nodeIndex); if (pdgNode.OutgoingEdges.Count() == 0) { for (int i = 0; i < n; i++) { rowValues[i] = defaultValue; } } else { for (int i = 0; i < n; i++) { rowValues[i] = 0.0; } } edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count()); // write number of outgoing edges for most of the advanced PageRank algorithms for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++) { PDGEdge pdgOutgoingEdge = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge); int columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID); // for positive values only if (columnFrequencies == -1) { throw new RDataException("Invalid column index."); // continue; } rowValues[columnFrequencies] = pdgOutgoingEdge.Weight; } //for (int i=1;i<=n;i++) //{ // matrixWriter.Write(rowValues[i]+" "); //} matrixWriter.WriteLine(String.Join(" ", rowValues)); mapWriter.WriteLine(pdgNode.MethodName); } matrixWriter.Flush(); matrixWriter.Close(); edgeWriter.Flush(); edgeWriter.Close(); mapWriter.Flush(); mapWriter.Close(); return(matrixFS.Name); }