/// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            var converter = new FastaToCompressedConverter();

            var genomeAssembly = GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly);

            converter.Convert(ConfigurationSettings.InputFastaPath, ConfigurationSettings.InputCytobandPath,
                              ConfigurationSettings.InputChromosomeNamesPath, ConfigurationSettings.OutputCompressedPath,
                              genomeAssembly);
        }
        public void GenomeAssemblyConvertTest()
        {
            Assert.Equal(GenomeAssembly.GRCh37, GenomeAssemblyUtilities.Convert("grCh37"));
            Assert.Equal(GenomeAssembly.GRCh38, GenomeAssemblyUtilities.Convert("GrCh38"));
            Assert.Equal(GenomeAssembly.hg19, GenomeAssemblyUtilities.Convert("HG19"));
            var error = false;

            try
            {
                GenomeAssemblyUtilities.Convert("asldkjf");
            }
            catch (Exception)
            {
                error = true;
            }

            Assert.True(error);
        }
        /// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            var transcriptSource = ConfigurationSettings.ImportRefSeqTranscripts
                ? TranscriptDataSource.RefSeq
                : TranscriptDataSource.Ensembl;

            var referenceIndex = new ReferenceIndex(ConfigurationSettings.InputReferencePath);
            var vepDirectories = referenceIndex.GetUcscKaryotypeOrder(ConfigurationSettings.InputVepDirectory);
            var converter      = new VepCacheParser(transcriptSource);

            var genomeAssembly = GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly);

            // =========================
            // create the pre-cache file
            // =========================

            // process each VEP directory
            int numDirectoriesProcessed = 0;

            var transcriptPath = ConfigurationSettings.OutputStub + ".transcripts.gz";
            var regulatoryPath = ConfigurationSettings.OutputStub + ".regulatory.gz";
            var genePath       = ConfigurationSettings.OutputStub + ".genes.gz";
            var intronPath     = ConfigurationSettings.OutputStub + ".introns.gz";
            var exonPath       = ConfigurationSettings.OutputStub + ".exons.gz";
            var mirnaPath      = ConfigurationSettings.OutputStub + ".mirnas.gz";
            var siftPath       = ConfigurationSettings.OutputStub + ".sift.dat";
            var polyphenPath   = ConfigurationSettings.OutputStub + ".polyphen.dat";
            var cdnaPath       = ConfigurationSettings.OutputStub + ".cdnas.gz";
            var peptidePath    = ConfigurationSettings.OutputStub + ".peptides.gz";

            using (var transcriptWriter = GZipUtilities.GetStreamWriter(transcriptPath))
                using (var regulatoryWriter = GZipUtilities.GetStreamWriter(regulatoryPath))
                    using (var geneWriter = GZipUtilities.GetStreamWriter(genePath))
                        using (var intronWriter = GZipUtilities.GetStreamWriter(intronPath))
                            using (var exonWriter = GZipUtilities.GetStreamWriter(exonPath))
                                using (var mirnaWriter = GZipUtilities.GetStreamWriter(mirnaPath))
                                    using (var siftWriter = GZipUtilities.GetBinaryWriter(siftPath + ".tmp"))
                                        using (var polyphenWriter = GZipUtilities.GetBinaryWriter(polyphenPath + ".tmp"))
                                            using (var cdnaWriter = GZipUtilities.GetStreamWriter(cdnaPath))
                                                using (var peptideWriter = GZipUtilities.GetStreamWriter(peptidePath))
                                                {
                                                    transcriptWriter.NewLine = "\n";
                                                    regulatoryWriter.NewLine = "\n";
                                                    geneWriter.NewLine       = "\n";
                                                    intronWriter.NewLine     = "\n";
                                                    exonWriter.NewLine       = "\n";
                                                    mirnaWriter.NewLine      = "\n";
                                                    cdnaWriter.NewLine       = "\n";
                                                    peptideWriter.NewLine    = "\n";

                                                    WriteHeader(transcriptWriter, GlobalImportCommon.FileType.Transcript, transcriptSource, genomeAssembly);
                                                    WriteHeader(regulatoryWriter, GlobalImportCommon.FileType.Regulatory, transcriptSource, genomeAssembly);
                                                    WriteHeader(geneWriter, GlobalImportCommon.FileType.Gene, transcriptSource, genomeAssembly);
                                                    WriteHeader(intronWriter, GlobalImportCommon.FileType.Intron, transcriptSource, genomeAssembly);
                                                    WriteHeader(exonWriter, GlobalImportCommon.FileType.Exon, transcriptSource, genomeAssembly);
                                                    WriteHeader(mirnaWriter, GlobalImportCommon.FileType.MicroRna, transcriptSource, genomeAssembly);
                                                    WriteHeader(siftWriter, GlobalImportCommon.FileType.Sift, transcriptSource, genomeAssembly);
                                                    WriteHeader(polyphenWriter, GlobalImportCommon.FileType.PolyPhen, transcriptSource, genomeAssembly);
                                                    WriteHeader(cdnaWriter, GlobalImportCommon.FileType.CDna, transcriptSource, genomeAssembly);
                                                    WriteHeader(peptideWriter, GlobalImportCommon.FileType.Peptide, transcriptSource, genomeAssembly);

                                                    foreach (var refTuple in vepDirectories)
                                                    {
                                                        // DEBUG
                                                        //if (refTuple.Item1 != "chr7") continue;

                                                        Console.WriteLine("Parsing reference sequence [{0}]:", refTuple.Item1);
                                                        numDirectoriesProcessed++;

                                                        var refIndex = referenceIndex.GetIndex(refTuple.Item1);

                                                        converter.ParseDumpDirectory(refIndex, refTuple.Item2, transcriptWriter, regulatoryWriter, geneWriter,
                                                                                     intronWriter, exonWriter, mirnaWriter, siftWriter, polyphenWriter, cdnaWriter, peptideWriter);
                                                    }
                                                }

            Console.WriteLine("\n{0} directories processed.", numDirectoriesProcessed);

            converter.DumpStatistics();
            Console.WriteLine();

            // convert our protein function predictions
            var predictionConverter = new PredictionConverter(referenceIndex.NumReferenceSeqs);

            predictionConverter.Convert(siftPath, "SIFT", GlobalImportCommon.FileType.Sift);
            predictionConverter.Convert(polyphenPath, "PolyPhen", GlobalImportCommon.FileType.PolyPhen);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// executes the program
        /// </summary>
        protected override void ProgramExecution()
        {
            Console.WriteLine("Reading file: {0}", ConfigurationSettings.InputWigFixFile);

            var timer = new Benchmark();

            var version = GetDataVersion();

            using (var nirvanaPhylopDatabaseCreator = new PhylopWriter(ConfigurationSettings.InputWigFixFile, version, GenomeAssemblyUtilities.Convert(ConfigurationSettings.GenomeAssembly), ConfigurationSettings.OutputNirvanaDirectory))
            {
                nirvanaPhylopDatabaseCreator.ExtractPhylopScores();
            }
            Console.WriteLine("Time:{0}", timer.GetElapsedTime());
        }