Example #1
0
        private void WriteRemappedEntry(string chrom, int pos, string vcfLine)
        {
            if (!_writers.ContainsKey(chrom))
            {
                _writers[chrom] = GZipUtilities.GetStreamWriter(chrom + ".vcf.gz");
            }

            var splits = vcfLine.Split('\t', 3);

            _writers[chrom].WriteLine($"{chrom}\t{Math.Abs(pos)}\t{splits[2]}");
        }
Example #2
0
        /// <summary>
        /// constructor
        /// </summary>
        public HgncReader(string filePath)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified gene_info file ({filePath}) does not exist.");
            }

            // open the file and parse the header
            _reader = GZipUtilities.GetAppropriateStreamReader(filePath);
            _reader.ReadLine();
        }
        /// <summary>
        /// constructor
        /// </summary>
        public TempPredictionReader(string filePath, string description, GlobalImportCommon.FileType fileType)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified protein function prediction file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateBinaryReader(filePath);
            Header  = GetHeader(description, filePath, fileType, _reader);
        }
Example #4
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepRegulatoryReader(string filePath)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified exon file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateStreamReader(filePath);
            VepReaderCommon.GetHeader("regulatory element", filePath, GlobalImportCommon.FileType.Regulatory, _reader);
        }
Example #5
0
        public static IVcfReader GetVcfReader(string vcfPath, IDictionary <string, IChromosome> chromosomeDictionary,
                                              IRefMinorProvider refMinorProvider, bool verboseTranscript, IRecomposer recomposer)
        {
            var useStdInput = vcfPath == "-";

            var peekStream =
                new PeekStream(useStdInput
                        ? Console.OpenStandardInput()
                        : GZipUtilities.GetAppropriateReadStream(vcfPath));

            return(new VcfReader(peekStream, chromosomeDictionary, refMinorProvider, verboseTranscript, recomposer));
        }
Example #6
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepSimpleIntervalReader(string filePath, string description, GlobalImportCommon.FileType fileType)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified intron file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateStreamReader(filePath);
            VepReaderCommon.GetHeader(description, filePath, fileType, _reader);
        }
Example #7
0
        private static ExitCodes ProgramExecution()
        {
            using var mitoHeteroplasmyParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile));
            using var tsvStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, OutFileName));
            using var tsvWriter = new StreamWriter(tsvStream);
            tsvWriter.WriteLine(HeaderLine);
            foreach (var line in mitoHeteroplasmyParser.GetOutputLines())
            {
                tsvWriter.WriteLine(line);
            }

            return(ExitCodes.Success);
        }
Example #8
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepTranscriptReader(string filePath)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified transcripts file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _filePath = filePath;
            _reader   = GZipUtilities.GetAppropriateStreamReader(filePath);
            Header    = GetHeader();
        }
Example #9
0
        private void ExtractFromClinVar()
        {
            if (_clinvarReader == null)
            {
                return;
            }

            using (var writer = GZipUtilities.GetStreamWriter(IsisClinicalIndelFileName))
            {
                string line;
                while ((line = _clinvarReader.ReadLine()) != null)
                {
                    // Skip empty lines.
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }

                    //copy required header lines
                    if (line.StartsWith("#"))
                    {
                        ProcessHeaderLine(writer, line);
                        continue;
                    }

                    var fields = line.Split('\t');

                    if (IsSnv(fields[VcfCommon.RefIndex], fields[VcfCommon.AltIndex]))
                    {
                        continue;
                    }

                    _clinvarCount++;
                    var chrName = GetChrName(fields[VcfCommon.ChromIndex]);

                    //skip mito for hg19
                    if (_assembly == GenomeAssembly.hg19 && (chrName == "chrM" || chrName == "MT"))
                    {
                        continue;
                    }

                    writer.Write(chrName + '\t' +
                                 fields[VcfCommon.PosIndex] + '\t' +
                                 fields[VcfCommon.IdIndex] + '\t' +
                                 fields[VcfCommon.RefIndex] + '\t' +
                                 fields[VcfCommon.AltIndex] + '\t' +
                                 ".\t.\t.\n");
                }
            }
        }
Example #10
0
        private static void WriteDictionary(ILogger logger, IEnumerable <GenbankEntry> entries)
        {
            var header = new IntermediateIoHeader(0, 0, Source.None, GenomeAssembly.Unknown, 0);

            logger.Write($"- writing Genbank file ({Path.GetFileName(GenbankFilePath)})... ");
            using (var writer = new GenbankWriter(GZipUtilities.GetStreamWriter(GenbankFilePath), header))
            {
                foreach (var entry in entries)
                {
                    writer.Write(entry);
                }
            }
            logger.WriteLine("finished.");
        }
Example #11
0
        public static RefSeqGff Create(string gcfGffPath, string refGffPath, IDictionary <string, IChromosome> accessionToChromosome)
        {
            var refSeqGenes = LoadRefSeqGffGenes(GZipUtilities.GetAppropriateStreamReader(gcfGffPath),
                                                 GZipUtilities.GetAppropriateStreamReader(refGffPath), accessionToChromosome);

            var entrezGeneIdToGene = refSeqGenes
                                     .GetMultiValueDict(x => x.GeneId)
                                     .FlattenGeneList()
                                     .GetMultiValueDict(x => x.GeneId);

            var entrezGeneIdToSymbol = refSeqGenes.GetKeyValueDict(x => x.GeneId, x => x.Symbol);

            return(new RefSeqGff(entrezGeneIdToGene, entrezGeneIdToSymbol));
        }
Example #12
0
        private static IEnumerable <ISupplementaryDataItem> GetItems(IEnumerable <string> filePaths,
                                                                     ISequenceProvider referenceProvider)
        {
            IEnumerable <ISupplementaryDataItem> items = null;

            foreach (string filePath in filePaths)
            {
                var fileStreamReader = GZipUtilities.GetAppropriateStreamReader(filePath);
                var reader           = new GnomadReader(fileStreamReader, referenceProvider);
                items = items == null?reader.GetItems() : items.Concat(reader.GetItems());
            }

            return(items);
        }
Example #13
0
        private ExitCodes ProgramExecution()
        {
            var sequenceProvider             = ProviderUtilities.GetSequenceProvider(ConfigurationSettings.RefSequencePath);
            var transcriptAnnotationProvider =
                ProviderUtilities.GetTranscriptAnnotationProvider(ConfigurationSettings.InputCachePrefix, sequenceProvider);



            var annotator = ProviderUtilities.GetAnnotator(transcriptAnnotationProvider, sequenceProvider);

            var dataSourceVesions = new List <IDataSourceVersion>();

            dataSourceVesions.AddRange(transcriptAnnotationProvider.DataSourceVersions);


            using (var outputWriter = new StreamWriter(ConfigurationSettings.OutputFileName))
                using (var vcfReader = new VcfReader(GZipUtilities.GetAppropriateReadStream(ConfigurationSettings.VcfPath), sequenceProvider.GetChromosomeDictionary(), null, false))
                {
                    try
                    {
                        if (vcfReader.IsRcrsMitochondrion && annotator.GenomeAssembly == GenomeAssembly.GRCh37 ||
                            annotator.GenomeAssembly == GenomeAssembly.GRCh38 ||
                            ConfigurationSettings.ForceMitochondrialAnnotation)
                        {
                            annotator.EnableMitochondrialAnnotation();
                        }

                        int       previousChromIndex = -1;
                        IPosition position;
                        // var sortedVcfChecker = new SortedVcfChecker();
                        outputWriter.WriteLine(OutHeader);

                        while ((position = vcfReader.GetNextPosition()) != null)
                        {
                            // sortedVcfChecker.CheckVcfOrder(position.Chromosome.UcscName);
                            previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome);

                            var annotatedPosition = annotator.Annotate(position);
                            WriteAnnotatedPostion(annotatedPosition, outputWriter);
                        }
                    }
                    catch (Exception e)
                    {
                        e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine;
                        throw;
                    }
                }

            return(ExitCodes.Success);
        }
Example #14
0
        private static HashSet <int>[] GetIntronFlankingRegionStarts(string gffFile)
        {
            var flankingRegionStarts = new HashSet <int> [NumChrs];

            for (var i = 0; i < NumChrs; i++)
            {
                flankingRegionStarts[i] = new HashSet <int>();
            }
            using (var gffReader = GZipUtilities.GetAppropriateStreamReader(gffFile))
            {
                string line;
                var    previousChrIndex            = ushort.MaxValue;
                var    exonBoundaries              = new List <Interval>();
                var    flankingRegionStartsthisChr = new HashSet <int>();
                while ((line = gffReader.ReadLine()) != null)
                {
                    var info = line.Split('\t');
                    if (info[GffFeatureColumn] == "gene")
                    {
                        ushort chrIndex = GetChrIndex(info[GffChrColumn]);
                        if (previousChrIndex != ushort.MaxValue && chrIndex != previousChrIndex)
                        {
                            ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);
                            flankingRegionStarts[previousChrIndex] = flankingRegionStartsthisChr;
                            flankingRegionStartsthisChr            = new HashSet <int>();
                        }
                        previousChrIndex = chrIndex;
                    }
                    else if (info[GffFeatureColumn] == "transcript")
                    {
                        ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);
                        exonBoundaries = new List <Interval>();
                    }
                    else if (info[GffFeatureColumn] == "exon")
                    {
                        int start = int.Parse(info[GffStartColumn]);
                        int end   = int.Parse(info[GffEndColumn]);
                        exonBoundaries.Add(new Interval(start, end));
                    }
                }
                if (previousChrIndex != ushort.MaxValue)
                {
                    ProcessBufferedBoundaries(exonBoundaries, flankingRegionStartsthisChr);
                    flankingRegionStarts[previousChrIndex] = flankingRegionStartsthisChr;
                }
            }

            return(flankingRegionStarts);
        }
Example #15
0
        private static ExitCodes ProgramExecution()
        {
            if (_createIndex)
            {
                using (var indexCreator = new IndexCreator(_inputJson))
                {
                    indexCreator.CreateIndex();
                }

                return(ExitCodes.Success);
            }

            string indexFileName = _inputJson + JasixCommons.FileExt;

            ValidateIndexFile(indexFileName);
            var writer = string.IsNullOrEmpty(_outputFile)
                ? null : GZipUtilities.GetStreamWriter(_outputFile);

            using (var queryProcessor = new QueryProcessor(GZipUtilities.GetAppropriateStreamReader(_inputJson),
                                                           FileUtilities.GetReadStream(indexFileName), writer))
            {
                if (_list)
                {
                    queryProcessor.ListChromosomesAndSections();
                    return(ExitCodes.Success);
                }

                if (_printHeaderOnly)
                {
                    queryProcessor.PrintHeaderOnly();
                    return(ExitCodes.Success);
                }

                if (!string.IsNullOrEmpty(_section))
                {
                    queryProcessor.PrintSection(_section);
                    return(ExitCodes.Success);
                }

                if (Queries == null)
                {
                    Console.WriteLine("Please specify query region(s)");
                    return(ExitCodes.BadArguments);
                }

                queryProcessor.ProcessQuery(Queries, _printHeader);
            }
            return(ExitCodes.Success);
        }
Example #16
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            List <CustomInterval> intervals;
            SaJsonSchema          intervalJsonSchema;
            string            jsonTag;
            DataSourceVersion version;
            string            outputPrefix      = GetOutputPrefix(_inputFile);
            string            nsaFileName       = Path.Combine(_outputDirectory, outputPrefix + SaCommon.SaFileSuffix);
            string            nsaIndexFileName  = nsaFileName + SaCommon.IndexSufix;
            string            nsaSchemaFileName = nsaFileName + SaCommon.JsonSchemaSuffix;
            ReportFor         reportFor;

            var nsaItemCount = 0;

            using (var parser = VariantAnnotationsParser.Create(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(nsaFileName))
                    using (var indexStream = FileUtilities.GetCreateStream(nsaIndexFileName))
                        using (var nsaWriter = CaUtilities.GetNsaWriter(nsaStream, indexStream, parser, CaUtilities.GetInputFileName(_inputFile), referenceProvider, out version))
                            using (var saJsonSchemaStream = FileUtilities.GetCreateStream(nsaSchemaFileName))
                                using (var schemaWriter = new StreamWriter(saJsonSchemaStream))
                                {
                                    (jsonTag, nsaItemCount, intervalJsonSchema, intervals) = CaUtilities.WriteSmallVariants(parser, nsaWriter, schemaWriter);
                                    reportFor = parser.ReportFor;
                                    if (intervals == null)
                                    {
                                        return(ExitCodes.Success);
                                    }
                                }

            if (nsaItemCount == 0)
            {
                File.Delete(nsaFileName);
                File.Delete(nsaIndexFileName);
                File.Delete(nsaSchemaFileName);
            }

            using (var nsiStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix)))
                using (var nsiWriter = CaUtilities.GetNsiWriter(nsiStream, version, referenceProvider.Assembly, jsonTag, reportFor))
                    using (var siJsonSchemaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outputPrefix + SaCommon.IntervalFileSuffix + SaCommon.JsonSchemaSuffix)))
                        using (var schemaWriter = new StreamWriter(siJsonSchemaStream))
                        {
                            nsiWriter.Write(intervals);
                            schemaWriter.Write(intervalJsonSchema);
                        }

            return(ExitCodes.Success);
        }
Example #17
0
        /// <summary>
        /// Parses a ClinVar file and return an enumeration object containing all the ClinVar objects
        /// that have been extracted
        /// </summary>
        private IEnumerable <ClinVarItem> GetItems()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_clinVarXmlFileInfo.FullName))
                using (var xmlReader = XmlTextReader.Create(reader, new XmlReaderSettings {
                    DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true
                }))
                {
                    string elementName = null;

                    //skipping the top level element to go down to its children
                    xmlReader.ReadToDescendant("ClinVarSet");

                    do
                    {
                        LiteXmlElement xmlElement = null;

                        switch (xmlReader.NodeType)
                        {
                        case XmlNodeType.Element:                         // The node is an element.
                            elementName = xmlReader.Name;
                            xmlElement  = ParseXmlElement(xmlReader);
                            break;

                        case XmlNodeType.EndElement:                         //Display the end of the element.
                            // Release set is the top level element we skipped. So, we will encounter this mismatch.
                            if (xmlReader.Name != "ReleaseSet" && xmlReader.Name != elementName)
                            {
                                throw new InvalidDataException("WARNING!! encountered unexpected endElement tag:" + xmlReader.Name);
                            }
                            break;

                        default:
                            continue;
                        }

                        var clinVarItems = ExtractClinVarItems(xmlElement);

                        if (clinVarItems == null)
                        {
                            continue;
                        }

                        foreach (var clinVarItem in clinVarItems)
                        {
                            yield return(clinVarItem);
                        }
                    } while (xmlReader.Read());
                }
        }
Example #18
0
        public void VcfHeaderCheck()
        {
            var randomPath         = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
            var outputVcfPath      = randomPath + ".vcf.gz";
            var dataSourceVersions = new List <DataSourceVersion>
            {
                new DataSourceVersion("VEP", "84", 0, "Ensembl"),
                new DataSourceVersion("dbSNP", "147", Convert.ToDateTime("06/01/2016").Ticks),
                new DataSourceVersion("COSMIC", "78", Convert.ToDateTime("09/05/2016").Ticks)
            };

            using (new LiteVcfWriter(outputVcfPath, new List <string> {
                "##source=SpliceGirl 1.0.0.28", "##reference=file:/illumina/scratch/Zodiac/Software/Jenkins/R2/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta"
            }, "84.22.34", dataSourceVersions))
            {
            }

            var observedHeader = new List <string>();

            using (var reader = GZipUtilities.GetAppropriateStreamReader(outputVcfPath))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    observedHeader.Add(line);
                }
            }
            var expectedHeader = new List <string>
            {
                "##source=SpliceGirl 1.0.0.28",
                "##reference=file:/illumina/scratch/Zodiac/Software/Jenkins/R2/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta",
                "##annotator=Illumina Annotation Engine",
                "##annotatorDataVersion=84.22.34",
                "##annotatorTranscriptSource=Ensembl",
                "##dataSource=dbSNP,version:147,release date:2016-06-01",
                "##dataSource=COSMIC,version:78,release date:2016-09-05"
            };

            Assert.Equal(expectedHeader.Count + 10, observedHeader.Count);            //for info tags added by default
            for (var i = 0; i < expectedHeader.Count; i++)
            {
                if (expectedHeader[i].StartsWith("##annotator=Illumina"))
                {
                    Assert.Contains(expectedHeader[i], observedHeader[i]);
                    continue;
                }
                Assert.Equal(expectedHeader[i], observedHeader[i]);
            }
        }
Example #19
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var clinGenReader = new ClinGenReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, referenceProvider.Assembly, SaCommon.ClinGenTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(clinGenReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Example #20
0
        private static ExitCodes ProgramExecution()
        {
            var refProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_referenceSequencePath));
            var version     = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            var outFileName = $"{version.Name}_{version.Version}";

            using (var parser = new LcrRegionParser(GZipUtilities.GetAppropriateStreamReader(_inputFile), refProvider))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.LcrFileSuffix)))
                    using (var writer = new NsiWriter(stream, version, refProvider.Assembly, SaCommon.LowComplexityRegionTag, ReportFor.AllVariants, SaCommon.NsiSchemaVersion))
                    {
                        writer.Write(parser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Example #21
0
        private static ExitCodes ProgramExecution()
        {
            var    dosageMapRegionVersion = DataSourceVersionReader.GetSourceVersion(_dosageMapRegionFile + ".version");
            string outFileName            = $"{dosageMapRegionVersion.Name.Replace(' ', '_')}_{dosageMapRegionVersion.Version}";
            var    referenceProvider      = new ReferenceSequenceProvider(GZipUtilities.GetAppropriateReadStream(_inputReferencePath));

            using (var dosageSensitivityParser = new DosageMapRegionParser(GZipUtilities.GetAppropriateReadStream(_dosageMapRegionFile), referenceProvider.RefNameToChromosome))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(stream, dosageMapRegionVersion, referenceProvider.Assembly, SaCommon.DosageSensitivityTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(dosageSensitivityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Example #22
0
        private static ExitCodes ProgramExecution()
        {
            var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_dosageSensitivityFile + ".version");

            string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ','_')}_{dosageSensitivityVersion.Version}";

            using (var dosageSensitivityParser = new DosageSensitivityParser(GZipUtilities.GetAppropriateReadStream(_dosageSensitivityFile)))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DosageSensitivityTag, SaCommon.SchemaVersion, false))
                    {
                        ngaWriter.Write(dosageSensitivityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
        public IEnumerable <ISupplementaryInterval> GetItems(string refName)
        {
            if (!_refNameOffsets.ContainsKey(refName))
            {
                yield break;
            }

            var offset = _refNameOffsets[refName];

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_fileName))
            {
                reader.BaseStream.Position = offset;
                string line;
                //getting to the chromosome
                while ((line = reader.ReadLine()) != null)
                {
                    if (string.IsNullOrWhiteSpace(line) || line.StartsWith("#"))
                    {
                        continue;
                    }
                    // finding desired chromosome. We need this because the GetLocation for GZipStream may return a position a few lines before the start of the chromosome
                    if (line.StartsWith(refName + "\t"))
                    {
                        break;
                    }
                }
                if (line == null)
                {
                    yield break;
                }
                do
                {
                    //next chromosome
                    if (!line.StartsWith(refName + "\t"))
                    {
                        yield break;
                    }

                    var annotationItem = ExtractItem(line);
                    if (annotationItem == null)
                    {
                        continue;
                    }

                    yield return(annotationItem);
                } while ((line = reader.ReadLine()) != null);
            }
        }
Example #24
0
        private static ExitCodes ProgramExecution()
        {
            string cachePath = CacheConstants.TranscriptPath(_inputPrefix);

            var(refIndexToChromosome, _, _) = SequenceHelper.GetDictionaries(_compressedReferencePath);
            var cache            = TranscriptCacheHelper.GetCache(cachePath, refIndexToChromosome);
            var geneToInternalId = InternalGenes.CreateDictionary(cache.Genes);

            using (var writer = new GffWriter(GZipUtilities.GetStreamWriter(_outputFileName)))
            {
                var creator = new GffCreator(writer, geneToInternalId);
                creator.Create(cache.TranscriptIntervalArrays);
            }

            return(ExitCodes.Success);
        }
Example #25
0
        private static ExitCodes ProgramExecution()
        {
            var version = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}";

            //create universal gene archive
            using (var exacParser = new ExacScoresParser(GZipUtilities.GetAppropriateStreamReader(_inputFile)))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.NgaFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, version, SaCommon.ExacScoreTag, SaCommon.SchemaVersion, false))
                    {
                        ngaWriter.Write(exacParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
        private ExitCodes ProgramExecution()
        {
            var version           = DataSourceVersionReader.GetSourceVersion(_cnvTsv + ".version");
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));

            var cnvStream      = _cnvTsv == null? null: GZipUtilities.GetAppropriateReadStream(_cnvTsv);
            var breakendStream = _breakendTsv == null ? null : GZipUtilities.GetAppropriateReadStream(_breakendTsv);

            using (var cosmicSvExtractor = new CosmicSvReader(cnvStream, breakendStream, version, _outputDir,
                                                              referenceProvider.GenomeAssembly, referenceProvider.RefNameToChromosome))
            {
                cosmicSvExtractor.CreateTsv();
            }

            return(ExitCodes.Success);
        }
Example #27
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}_{SaCommon.RefMinorTag}".Replace(' ', '_');

            using (var refMinorReader = new RefMinorReader(GZipUtilities.GetAppropriateStreamReader(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.RefMinorFileSuffix + SaCommon.IndexSufix)))
                        using (var writer = new RefMinorDbWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.SchemaVersion))
                        {
                            writer.Write(refMinorReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
Example #28
0
        private static ExitCodes ProgramExecution()
        {
            var dosageSensitivityVersion = DataSourceVersionReader.GetSourceVersion(_diseaseValidityFile + ".version");

            string outFileName = $"{dosageSensitivityVersion.Name.Replace(' ', '_')}_{dosageSensitivityVersion.Version}";

            // read uga file to get hgnc id to gene symbols dictionary
            using (var diseaseValidityParser = new GeneDiseaseValidityParser(GZipUtilities.GetAppropriateReadStream(_diseaseValidityFile), GetHgncIdToGeneSymbols()))
                using (var stream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.GeneFileSuffix)))
                    using (var ngaWriter = new NgaWriter(stream, dosageSensitivityVersion, SaCommon.DiseaseValidityTag, SaCommon.SchemaVersion, true))
                    {
                        ngaWriter.Write(diseaseValidityParser.GetItems());
                    }

            return(ExitCodes.Success);
        }
Example #29
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFile + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var primateAiParser = new MitoHeteroplasmyParser(GZipUtilities.GetAppropriateReadStream(_inputFile), referenceProvider))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                    using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                        using (var nsaWriter = new NsaWriter(nsaStream, indexStream, version, referenceProvider, SaCommon.MitoHeteroplasmyTag, true, false, SaCommon.SchemaVersion, false))
                        {
                            nsaWriter.Write(primateAiParser.GetItems());
                        }

            return(ExitCodes.Success);
        }
Example #30
0
        public void Create()
        {
            using (var reader = new GlobalCacheReader(CacheConstants.TranscriptPath(_inputPrefix)))
                using (var writer = GZipUtilities.GetStreamWriter(_outPath))
                {
                    WriteVcfHeader(writer);

                    var cache = reader.Read();
                    Console.Write("- found {0} transcripts... ", cache.Transcripts.Length);
                    foreach (var transcript in cache.Transcripts)
                    {
                        CreateVcf(writer, transcript);
                    }
                    Console.WriteLine("finished.");
                }
        }