示例#1
0
        /// <summary>
        /// Precompute method
        /// </summary>
        public override void PreCompute()
        {
            RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
            CheckParameters();
            _outputFile = RUtil.ReserveCacheFile("LSA.out");
            DirectoryInfo corpusDir = SaveArtifactsToCache(_source, _target, "LSA.corpus");

            _arguments = new List <object>();
            _arguments.Add(corpusDir.FullName);
            _arguments.Add(_SourceFile);
            _arguments.Add(_TargetFile);
            _arguments.Add(_outputFile);
            _arguments.Add(_config.Dimensions);
        }
示例#2
0
        private DirectoryInfo SaveArtifactsToCache(TLArtifactsCollection source, TLArtifactsCollection target, string name)
        {
            DirectoryInfo infoDir      = RUtil.CreateCacheDirectory(name);
            FileStream    sFile        = RUtil.CreateCacheFile("LSA.corpus.source");
            TextWriter    sourceWriter = new StreamWriter(sFile);

            _SourceFile = sFile.Name;
            FileStream tFile        = RUtil.CreateCacheFile("LSA.corpus.target");
            TextWriter targetWriter = new StreamWriter(tFile);

            _TargetFile = tFile.Name;
            FileStream mFile     = RUtil.CreateCacheFile("LSA.corpus.map");
            TextWriter mapWriter = new StreamWriter(mFile);

            _mapFile = mFile.Name;

            int fileIndex = 1;

            foreach (TLArtifact artifact in source.Values)
            {
                TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
                tw.Write(artifact.Text);
                tw.Flush();
                tw.Close();
                sourceWriter.WriteLine(fileIndex);
                mapWriter.WriteLine(artifact.Id);
                fileIndex++;
            }
            sourceWriter.Flush();
            sourceWriter.Close();

            foreach (TLArtifact artifact in target.Values)
            {
                TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
                tw.Write(artifact.Text);
                tw.Flush();
                tw.Close();
                targetWriter.WriteLine(fileIndex);
                mapWriter.WriteLine(artifact.Id);
                fileIndex++;
            }
            targetWriter.Flush();
            targetWriter.Close();

            mapWriter.Flush();
            mapWriter.Close();

            return(infoDir);
        }
示例#3
0
        /// <summary>
        /// Precompute method
        /// </summary>
        public override void PreCompute()
        {
            RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
            DirectoryInfo sourceInfo = SaveArtifactsToCache(_source, "GibbsLDA.source");
            DirectoryInfo targetInfo = SaveArtifactsToCache(_target, "GibbsLDA.target");

            _outputFile = RUtil.ReserveCacheFile("GibbsLDA.out");
            _arguments  = new List <object>();
            _arguments.Add(sourceInfo.FullName);
            _arguments.Add(targetInfo.FullName);
            _arguments.Add(_outputFile);
            _arguments.Add(_config.NumTopics);
            _arguments.Add(_config.GibbsIterations);
            _arguments.Add(_config.Alpha);
            _arguments.Add(_config.Beta);
            _arguments.Add(_config.Seed);
        }
示例#4
0
        /// <summary>
        /// Saves corpus to cache.
        /// Overwrites existing files with the same name.
        /// </summary>
        /// <returns>Corpus base path + name</returns>
        public LDACorpusInfo Save()
        {
            LDACorpusInfo info = new LDACorpusInfo();

            info.Name = Name;
            // write matrix
            FileStream cFS = RUtil.CreateCacheFile(Name + ".corpus");

            info.Corpus = cFS.Name;
            TextWriter corpus = new StreamWriter(cFS);

            corpus.Write(Matrix);
            corpus.Flush();
            corpus.Close();
            // write vocab
            FileStream vFS = RUtil.CreateCacheFile(Name + ".vocab");

            info.Vocab = vFS.Name;
            TextWriter vocab = new StreamWriter(vFS);

            vocab.Write(Vocab);
            vocab.Flush();
            vocab.Close();
            // write edges
            FileStream eFS = RUtil.CreateCacheFile(Name + ".tableWriter");

            info.Edges = eFS.Name;
            TextWriter edges = new StreamWriter(eFS);

            edges.Write(Edges);
            edges.Flush();
            edges.Close();
            // write links
            FileStream lFS = RUtil.CreateCacheFile(Name + ".links");

            info.Links = lFS.Name;
            TextWriter links = new StreamWriter(lFS);

            links.Write(Links);
            links.Flush();
            links.Close();
            // return info
            return(info);
        }
示例#5
0
        /// <summary>
        /// Precompute method
        /// </summary>
        public override void PreCompute()
        {
            RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
            LDACorpus     corpus = new LDACorpus("LDA", _source, _target);
            LDACorpusInfo info   = corpus.Save();

            _outputFile = RUtil.ReserveCacheFile("LDA.out");
            _arguments  = new List <object>();
            _arguments.Add(info.Corpus);
            _arguments.Add(info.Vocab);
            _arguments.Add(info.Edges);
            _arguments.Add(_outputFile);
            _arguments.Add(_config.NumTopics);
            _arguments.Add(_config.NumIterations);
            _arguments.Add(_config.Alpha);
            _arguments.Add(_config.Eta);
            _arguments.Add(_config.PredictionBeta);
            _arguments.Add(_config.Seed);
        }
示例#6
0
 /// <summary>
 /// Precompute method
 /// </summary>
 public override void PreCompute()
 {
     RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
     _info       = _corpus.Save();
     _outputFile = RUtil.ReserveCacheFile("RTM.out");
     _arguments  = new List <object>();
     _arguments.Add(_info.Corpus);
     _arguments.Add(_info.Vocab);
     _arguments.Add(_info.Edges);
     _arguments.Add(_info.Links);
     _arguments.Add(_outputFile);
     _arguments.Add(_config.NumTopics);
     _arguments.Add(_config.NumIterations);
     _arguments.Add(_config.Alpha);
     _arguments.Add(_config.Eta);
     _arguments.Add(_config.RTMBeta);
     _arguments.Add(_config.PredictionBeta);
     _arguments.Add(_config.Seed);
 }
示例#7
0
        /// <summary>
        /// Creates a table of results for input into PCA
        /// </summary>
        /// <param name="matrices">Array of matrices</param>
        /// <returns>Table in R format</returns>
        private string CreateTable(params TLSimilarityMatrix[] matrices)
        {
            if (matrices.Length < 2)
            {
                throw new RDataException("Must have at least 2 matrices.");
            }
            FileStream tableFile   = RUtil.CreateCacheFile("PCA.table");
            TextWriter tableWriter = new StreamWriter(tableFile);

            tableWriter.Write("M1");
            for (int i = 1; i < matrices.Length; i++)
            {
                if (matrices[i].Count != matrices[0].Count)
                {
                    throw new RDataException("Matrices have different count of links.");
                }
                tableWriter.Write(String.Format("\tM{0}", i + 1));
            }
            tableWriter.WriteLine();
            foreach (TLSingleLink link in matrices[0].AllLinks)
            {
                tableWriter.Write(String.Format("{0}_{1}\t{2}",
                                                link.SourceArtifactId,
                                                link.TargetArtifactId,
                                                link.Score
                                                ));
                for (int i = 1; i < matrices.Length; i++)
                {
                    tableWriter.Write(String.Format("\t{0}", matrices[i].GetScoreForLink(link.SourceArtifactId, link.TargetArtifactId)));
                }
                tableWriter.WriteLine();
            }
            tableWriter.Flush();
            tableWriter.Close();
            return(tableFile.Name);
        }
示例#8
0
        private string GenerateAdjacencyMatrix()
        {
            int    n            = _pdg.Nodes.Count();
            double defaultValue = 1.0 / n;

            double[] rowValues = new double[n];

            FileStream matrixFS     = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.matrix");
            TextWriter matrixWriter = new StreamWriter(matrixFS);

            //FileStream edgeFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.edges");
            //TextWriter edgeWriter = new StreamWriter(edgeFS);

            FileStream mapFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.map");

            _mappingFile = mapFS.Name;
            TextWriter mapWriter = new StreamWriter(mapFS);

            for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++)
            {
                PDGNode pdgNode = _pdg.GetNode(nodeIndex);

                for (int i = 0; i < n; i++)
                {
                    rowValues[i] = 0;
                }

                //edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count());	// write number of outgoing edges for Topical HITS algorithm

                for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++)
                {
                    PDGEdge pdgOutgoingEdge   = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge);
                    int     columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID);

                    // for positive values only
                    if ((columnFrequencies < 0))
                    {
                        throw new RDataException();
                        // continue;
                    }
                    if (_config.Weight == WebMiningWeightEnum.Binary)
                    {
                        rowValues[columnFrequencies] = 1;
                    }
                    else
                    if (_config.Weight == WebMiningWeightEnum.Frequency)
                    {
                        rowValues[columnFrequencies] = pdgOutgoingEdge.Weight;
                    }
                    else
                    {
                        throw new RDataException("Unknown weighting scheme: " + _config.Weight);
                    }
                }

                //for (int i=1;i<=n;i++)
                //{
                //    matrixWriter.Write(rowValuesFrequencies[i]+" ");
                //    binaryWriter.Write(rowValuesBinary[i]+" ");
                //}
                matrixWriter.WriteLine(String.Join(" ", rowValues));
                mapWriter.WriteLine(pdgNode.MethodName);
            }

            matrixWriter.Flush();
            matrixWriter.Close();
            //edgeWriter.Flush();
            //edgeWriter.Close();
            mapWriter.Flush();
            mapWriter.Close();
            return(matrixFS.Name);
        }
示例#9
0
        private string GenerateTransitionProbabilityMatrix()
        {
            int    n            = _pdg.Nodes.Count();
            double defaultValue = 1.0 / n;

            double[] rowValues = new double[n];

            FileStream matrixFS     = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.matrix");
            TextWriter matrixWriter = new StreamWriter(matrixFS);

            FileStream edgeFS     = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.edges");
            TextWriter edgeWriter = new StreamWriter(edgeFS);

            FileStream mapFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.map");

            _mappingFile = mapFS.Name;
            TextWriter mapWriter = new StreamWriter(mapFS);

            for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++)
            {
                PDGNode pdgNode = _pdg.GetNode(nodeIndex);

                if (pdgNode.OutgoingEdges.Count() == 0)
                {
                    for (int i = 0; i < n; i++)
                    {
                        rowValues[i] = defaultValue;
                    }
                }
                else
                {
                    for (int i = 0; i < n; i++)
                    {
                        rowValues[i] = 0.0;
                    }
                }

                edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count());                    // write number of outgoing edges for most of the advanced PageRank algorithms

                for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++)
                {
                    PDGEdge pdgOutgoingEdge   = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge);
                    int     columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID);
                    // for positive values only
                    if (columnFrequencies == -1)
                    {
                        throw new RDataException("Invalid column index.");
                        // continue;
                    }
                    rowValues[columnFrequencies] = pdgOutgoingEdge.Weight;
                }

                //for (int i=1;i<=n;i++)
                //{
                //    matrixWriter.Write(rowValues[i]+" ");
                //}
                matrixWriter.WriteLine(String.Join(" ", rowValues));
                mapWriter.WriteLine(pdgNode.MethodName);
            }

            matrixWriter.Flush();
            matrixWriter.Close();
            edgeWriter.Flush();
            edgeWriter.Close();
            mapWriter.Flush();
            mapWriter.Close();
            return(matrixFS.Name);
        }