コード例 #1
0
 /// <summary>
 /// Parses a source file and return an enumeration object containing
 /// all the data objects that have been extracted.
 /// </summary>
 /// <returns></returns>
 private IEnumerable <ExacItem> GetExacItems()
 {
     using (var reader = GZipUtilities.GetAppropriateStreamReader(_exacFileInfo.FullName))
     {
         string line;
         while ((line = reader.ReadLine()) != null)
         {
             // Skip empty lines.
             if (string.IsNullOrWhiteSpace(line))
             {
                 continue;
             }
             // Skip comments.
             if (line.StartsWith("#"))
             {
                 continue;
             }
             var exacItemsList = ExtractItems(line);
             if (exacItemsList == null)
             {
                 continue;
             }
             foreach (var exacItem in exacItemsList)
             {
                 yield return(exacItem);
             }
         }
     }
 }
コード例 #2
0
        public MustGenotypeExtractor(string assembly, string oneKGenomeVcf, string clinvarVcf, string cosmicVcf)
        {
            switch (assembly)
            {
            case "GRCh37":
                _assembly = GenomeAssembly.GRCh37;
                break;

            case "GRCh38":
                _assembly = GenomeAssembly.GRCh38;
                break;

            case "hg19":
                _assembly = GenomeAssembly.hg19;
                break;

            default:
                _assembly = GenomeAssembly.Unknown;
                break;
            }

            if (_assembly == GenomeAssembly.Unknown)
            {
                throw new Exception("Genome assembly must be either GRCh37 or GRCh38");
            }

            _oneKGenomeReader = string.IsNullOrEmpty(oneKGenomeVcf)? null: GZipUtilities.GetAppropriateStreamReader(oneKGenomeVcf);
            _clinvarReader    = string.IsNullOrEmpty(clinvarVcf) ? null : GZipUtilities.GetAppropriateStreamReader(clinvarVcf);
            _cosmicReader     = string.IsNullOrEmpty(cosmicVcf) ? null : GZipUtilities.GetAppropriateStreamReader(cosmicVcf);
        }
コード例 #3
0
 private IEnumerable <OneKGenItem> GetOneKGenSvItems()
 {
     using (var reader = GZipUtilities.GetAppropriateStreamReader(_oneKGenSvFile.FullName))
     {
         string line;
         while ((line = reader.ReadLine()) != null)
         {
             // Skip empty lines.
             if (string.IsNullOrWhiteSpace(line))
             {
                 continue;
             }
             // Skip comments.
             if (line.StartsWith("#"))
             {
                 continue;
             }
             var oneKSvGenItem = ExtractOneKGenSvItem(line, _renamer);
             if (oneKSvGenItem == null)
             {
                 continue;
             }
             yield return(oneKSvGenItem);
         }
     }
 }
コード例 #4
0
ファイル: GeneInfoData.cs プロジェクト: wook2014/Nirvana
 private static IEnumerable <GeneInfo> LoadGeneInfoGenes(string filePath)
 {
     GeneInfo[] genes;
     using (var streamReader = GZipUtilities.GetAppropriateStreamReader(filePath))
         using (var reader = new GeneInfoReader(streamReader)) genes = reader.GetGenes();
     return(genes);
 }
コード例 #5
0
        private static ExitCodes ProgramExecution()
        {
            string outFileName = Path.GetFileName(_inputFileName).Replace("vcf", "bed");

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName))
                using (var outputStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName)))
                    using (var outputGzipStream = new GZipStream(outputStream, CompressionMode.Compress))
                        using (var writer = new StreamWriter(outputGzipStream))
                        {
                            string line;
                            while ((line = reader.ReadLine()) != null)
                            {
                                var fields = line.OptimizedSplit('\t', VcfCommon.InfoIndex + 2);
                                if (fields.Length <= VcfCommon.InfoIndex)
                                {
                                    continue;
                                }

                                string infoFields = fields[VcfCommon.InfoIndex];
                                string svEnd      = GetSvEndString(infoFields);
                                if (svEnd == null)
                                {
                                    continue;
                                }

                                // Because 1K Genome SV has a padding base, the POS should add one to get the 1-based start position of the interval
                                // However, the start position need to minus one to become the 0-based start position in a BED file
                                // So the POS value can be used directly in the BED file.
                                writer.WriteLine(string.Join('\t', fields[VcfCommon.ChromIndex], fields[VcfCommon.PosIndex], svEnd, fields[VcfCommon.IdIndex], fields[VcfCommon.AltIndex], infoFields));
                            }
                        }

            return(ExitCodes.Success);
        }
コード例 #6
0
        private IEnumerable <CustomItem> GetCustomItems()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_customFileInfo.FullName))
            {
                string line;

                while ((line = reader.ReadLine()) != null)
                {
                    // Skip empty lines.
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    if (line.StartsWith("#"))
                    {
                        ParseHeaderLine(line);
                        continue;
                    }
                    var customItemsList = ExtractCustomItems(line);
                    if (customItemsList == null)
                    {
                        continue;
                    }
                    foreach (var customItem in customItemsList)
                    {
                        yield return(customItem);
                    }
                }
            }
        }
コード例 #7
0
ファイル: ClinGenUnifier.cs プロジェクト: YuJiang01/Nirvana
        public ClinGenUnifier(FileInfo inputFileInfo, FileInfo refNameInfo = null)
        {
            _reader = GZipUtilities.GetAppropriateStreamReader(inputFileInfo.FullName);
            if (refNameInfo == null)
            {
                return;
            }

            _refNameDict = new Dictionary <string, string>();
            using (var refReader = GZipUtilities.GetAppropriateStreamReader(refNameInfo.FullName))
            {
                string line;
                while ((line = refReader.ReadLine()) != null)
                {
                    if (line.StartsWith("#"))
                    {
                        continue;
                    }
                    var lineContents = line.Split('\t');
                    var ucscName     = lineContents[0];
                    var ensemblName  = lineContents[1];
                    var inVep        = lineContents[2].Equals("YES");
                    if (inVep)
                    {
                        _refNameDict[ucscName] = ensemblName;
                    }
                }
            }
        }
コード例 #8
0
        // constructor
        public DataDumperReader(string filename)
        {
            // define our regular expressions
            _binaryKeyValueRegex          = new Regex("'([^']+)' => '\x1f\xfffd\x08", RegexOptions.Compiled);
            _dataTypeRegex                = new Regex("}, '([^']+)' \\)", RegexOptions.Compiled);
            _digitKeyRegex                = new Regex("^\\s*([\\d\\.]+)(?:,?)\\s*$", RegexOptions.Compiled);
            _digitKeyValueRegex           = new Regex("'([^']+)' => (\\d+)", RegexOptions.Compiled);
            _emptyListKeyValueRegex       = new Regex("'([^']+)' => \\[\\]", RegexOptions.Compiled);
            _emptyValueKeyValueRegex      = new Regex("'([^']+)' => \\{\\}", RegexOptions.Compiled);
            _listObjectKeyValueRegex      = new Regex("'([^']+)' => \\[", RegexOptions.Compiled);
            _multiLineKeyValueRegex       = new Regex("'([^']+)' => '([^']+)$", RegexOptions.Compiled);
            _objectKeyValueRegex          = new Regex("'([^']+)' => (?:bless\\( )?{", RegexOptions.Compiled);
            _openBracesRegex              = new Regex("bless\\( \\{", RegexOptions.Compiled);
            _referenceStringKeyRegex      = new Regex("^\\s*(\\$VAR\\d+->\\S+?)(?:,?)\\s*$", RegexOptions.Compiled);
            _referenceStringKeyValueRegex = new Regex("'([^']+)' => (\\$VAR\\S+)(?:,?)", RegexOptions.Compiled);
            _rootObjectKeyValueRegex      = new Regex("\\$VAR\\d = {", RegexOptions.Compiled);
            _stringKeyRegex               = new Regex("^\\s*'([^']+)'(?:,?)\\s*$", RegexOptions.Compiled);
            _stringKeyValueRegex          = new Regex("'([^']+)' => '([^']*)'", RegexOptions.Compiled);
            _undefKeyValueRegex           = new Regex("'([^']+)' => undef", RegexOptions.Compiled);

            // start building the dumper hierarchy
            using (_reader = GZipUtilities.GetAppropriateStreamReader(filename))
            {
                BuildDumperHierarchy();
            }

            // dump the tree
            // Console.WriteLine(_rootNode);
        }
コード例 #9
0
 /// <summary>
 /// Parses a dbSNP file and return an enumeration object containing
 /// all the dbSNP objects that have been extracted.
 /// </summary>
 /// <returns></returns>
 private IEnumerable <DbSnpItem> GetDbSnpItems()
 {
     using (var reader = _stream == null? GZipUtilities.GetAppropriateStreamReader(_dbSnpFile.FullName): new StreamReader(_stream))
     {
         string line;
         while ((line = reader.ReadLine()) != null)
         {
             // Skip empty lines.
             if (string.IsNullOrWhiteSpace(line))
             {
                 continue;
             }
             // Skip comments.
             if (line.StartsWith("#"))
             {
                 continue;
             }
             var dbSnpItems = ExtractItem(line);
             if (dbSnpItems == null || dbSnpItems.Count == 0)
             {
                 continue;
             }
             foreach (var dbSnpItem in dbSnpItems)
             {
                 yield return(dbSnpItem);
             }
         }
     }
 }
コード例 #10
0
        public void Extract()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputXmlFile))
                using (var xmlReader = XmlReader.Create(reader, new XmlReaderSettings {
                    DtdProcessing = DtdProcessing.Prohibit, IgnoreWhitespace = true
                }))
                {
                    var existVarSet = xmlReader.ReadToDescendant("ClinVarSet");

                    while (_rcvIds.Count > 0 && existVarSet)
                    {
                        var rcvContents = xmlReader.ReadOuterXml();

                        var rcv = DetectRcv(_rcvIds, rcvContents);

                        if (rcv != null)
                        {
                            var targetedContent = rcvContents;
                            var outXmlFile      = Path.Combine(_outputDir, rcv + ".xml");
                            WriteToFile(outXmlFile, targetedContent);
                        }
                        if (!xmlReader.IsStartElement("ClinVarSet"))
                        {
                            existVarSet = xmlReader.ReadToNextSibling("ClinVarSet");
                        }
                    }
                }

            if (_rcvIds.Count > 0)
            {
                Console.WriteLine($"Failed to Find {string.Join(',',_rcvIds)}");
            }
        }
コード例 #11
0
ファイル: DgvReader.cs プロジェクト: YuJiang01/Nirvana
        /// <summary>
        /// Parses a ClinVar file and return an enumeration object containing all the ClinVar objects
        /// that have been extracted
        /// </summary>
        private IEnumerable <DgvItem> GetDgvItems()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_dgvFileInfo.FullName))
            {
                while (true)
                {
                    // grab the next line
                    string line = reader.ReadLine();
                    if (line == null)
                    {
                        break;
                    }

                    // skip header and empty lines
                    if (string.IsNullOrWhiteSpace(line) || IsDgvHeader(line))
                    {
                        continue;
                    }
                    var dgvItem = ExtractDgvItem(line, _renamer);
                    if (dgvItem == null)
                    {
                        continue;
                    }
                    yield return(dgvItem);
                }
            }
        }
コード例 #12
0
        private static ExitCodes ProgramExecution()
        {
            const string tempLeftoverFilename = "LeftOvers.vcf.gz";
            Dictionary <string, StreamWriter> writers;

            ISequenceProvider srcSequenceProvider = ProviderUtilities.GetSequenceProvider(_srcRefSequence);
            ISequenceProvider desSequenceProvider = ProviderUtilities.GetSequenceProvider(_desRefSequence);

            using (var srcReader = GZipUtilities.GetAppropriateStreamReader(_srcMapFile))
                using (var destReader = GZipUtilities.GetAppropriateStreamReader(_destMapFile))
                    using (var leftoverWriter = GZipUtilities.GetStreamWriter(tempLeftoverFilename))
                    {
                        var chromMapper = new ChromMapper(srcReader, destReader, leftoverWriter, srcSequenceProvider, desSequenceProvider);
                        writers = chromMapper.Map();
                    }

            //now we will try to map the leftovers
            using (var destReader = GZipUtilities.GetAppropriateStreamReader(_destMapFile))
                using (var leftoverReader = GZipUtilities.GetAppropriateStreamReader(tempLeftoverFilename))
                {
                    var leftOverMapper = new LeftoverMapper(leftoverReader, destReader, writers, desSequenceProvider);
                    var leftoverCount  = leftOverMapper.Map();
                    Console.WriteLine($"{leftoverCount} leftovers mapped!!");
                }

            foreach (var writer in writers.Values)
            {
                writer.Dispose();
            }

            return(ExitCodes.Success);
        }
コード例 #13
0
ファイル: OmimReader.cs プロジェクト: YuJiang01/Nirvana
        private IEnumerable <OmimAnnotation> GetOmimItems()
        {
            //_processedGeneSymbols = new HashSet<string>();
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_omimFileInfo.FullName))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (IsHeader(line))
                    {
                        ParseHeader(line);
                        continue;
                    }
                    if (!IsContentLine(line))
                    {
                        continue;
                    }

                    var contents      = line.Split('\t');
                    var mimNumber     = Convert.ToInt64(contents[_mimNumberCol]);
                    var geneSymbol    = contents[_hgncCol];
                    var description   = contents[_geneDescriptionCol].Replace(@"\\'", @"'");
                    var phenotypeInfo = contents[_phenotypeCol].Replace(@",,", @",");
                    var phenotypes    = ParsePhenotypes(phenotypeInfo);

                    if (string.IsNullOrEmpty(geneSymbol))
                    {
                        continue;
                    }


                    yield return(new OmimAnnotation(geneSymbol, description, mimNumber, phenotypes));
                }
            }
        }
コード例 #14
0
 public void TestDbSnpReader()
 {
     using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(TestDgvFile), ChromosomeUtilities.RefNameToChromosome))
     {
         Assert.True(dgvReader.GetItems().SequenceEqual(CreateTruthDgvItemSequence()));
     }
 }
コード例 #15
0
        public void TwoStudyCosmicCoding()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);

            var cosmicItems = cosmicReader.GetCosmicItems();
            var count       = 0;

            foreach (var cosmicItem in cosmicItems)
            {
                switch (count)
                {
                case 0:
                    foreach (var study in cosmicItem.Studies)
                    {
                        Assert.Equal("544", study.Id);
                        Assert.Equal(new [] { "haematopoietic_and_lymphoid_tissue" }, study.Sites);
                        Assert.Equal(new [] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies);
                    }
                    break;

                case 1:
                    foreach (var study in cosmicItem.Studies)
                    {
                        Assert.Equal("544", study.Id);
                        Assert.Equal(new[] { "haematopoietic;lymphoid_tissue" }, study.Sites);
                        Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies);
                    }
                    break;
                }

                count++;
            }
        }
コード例 #16
0
        public void GZipReadAndWrite()
        {
            const string expectedLine1 =
                "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.";
            const string expectedLine2 =
                "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.";

            var randomPath = GetRandomPath();

            using (var writer = GZipUtilities.GetStreamWriter(randomPath))
            {
                writer.WriteLine(expectedLine1);
                writer.WriteLine(expectedLine2);
            }

            string observedLine1;
            string observedLine2;
            string observedLine3;

            using (var reader = GZipUtilities.GetAppropriateStreamReader(randomPath))
            {
                observedLine1 = reader.ReadLine();
                observedLine2 = reader.ReadLine();
                observedLine3 = reader.ReadLine();
            }

            Assert.Equal(expectedLine1, observedLine1);
            Assert.Equal(expectedLine2, observedLine2);
            Assert.Null(observedLine3);
        }
コード例 #17
0
        public IEnumerable <DataStructures.CustomInterval> GetCustomIntervals()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_customFileInfo.FullName))
            {
                string line;

                while ((line = reader.ReadLine()) != null)
                {
                    // Skip empty lines.
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    if (line.StartsWith("#"))
                    {
                        continue;
                    }

                    var customInterval = ExtractCustomInterval(line);
                    if (customInterval == null)
                    {
                        continue;
                    }
                    yield return(customInterval);
                }
            }
        }
コード例 #18
0
        public static void Filter(string intputTsv, string gffFile1, string gffFile2, string outputTsv)
        {
            var intronFlankingRegions = GetIntronFlankingRegions(gffFile1, gffFile2);

            using (var resultsReader = GZipUtilities.GetAppropriateStreamReader(intputTsv))
                using (var resultsWriter = GZipUtilities.GetStreamWriter(outputTsv))
                {
                    long   lineCount = 0;
                    string line;
                    while ((line = resultsReader.ReadLine()) != null)
                    {
                        var    info     = line.TrimEnd().Split('\t');
                        ushort chrIndex = GetChrIndex(info[PredChrColumn]);
                        int    pos      = int.Parse(info[PredPosColumn]);
                        if (intronFlankingRegions.OverlapsAny(chrIndex, pos, pos) ||
                            AnyScorePassTheCutoff(info, PredScoreColumns, FreqCutoff))
                        {
                            resultsWriter.WriteLine(line);
                        }
                        lineCount++;
                        if (lineCount % 1_000_000 == 0)
                        {
                            Console.WriteLine($"Processed {lineCount} lines. Current position: {info[PredChrColumn]}:{info[PredPosColumn]}");
                        }
                    }
                }
        }
コード例 #19
0
        public IEnumerable <InterimSaItem> GetItems(string refName)
        {
            if (!_refNameOffsets.ContainsKey(refName))
            {
                yield break;
            }

            var offset = _refNameOffsets[refName];

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_fileName))
            {
                reader.BaseStream.Position = offset;
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    if (string.IsNullOrWhiteSpace(line) || line.StartsWith("#"))
                    {
                        continue;
                    }
                    // finding desired chromosome. We need this because the GetLocation for GZipStream may return a position a few lines before the start of the chromosome
                    if (line.StartsWith(refName + "\t"))
                    {
                        break;
                    }
                }
                if (line == null)
                {
                    yield break;
                }
                string lastLine = line;
                do
                {
                    //next chromosome
                    if (!line.StartsWith(refName + "\t"))
                    {
                        yield break;
                    }

                    var annotationItem = ExtractItem(line);
                    if (annotationItem == null)
                    {
                        continue;
                    }

                    yield return(annotationItem);

                    try
                    {
                        line = reader.ReadLine();
                    }
                    catch (Exception)
                    {
                        Console.WriteLine("error while reading line in while loop. Last line read:");
                        Console.WriteLine(lastLine);
                        throw;
                    }
                    lastLine = line;
                } while (line != null);
            }
        }
コード例 #20
0
ファイル: EnsemblGtf.cs プロジェクト: wangdi2014/Nirvana
        public static EnsemblGtf Create(string filePath, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var ensemblGenes      = LoadEnsemblGenes(GZipUtilities.GetAppropriateStreamReader(filePath), refNameToChromosome);
            var ensemblIdToGene   = ensemblGenes.GetSingleValueDict(x => x.GeneId);
            var ensemblIdToSymbol = ensemblGenes.GetKeyValueDict(x => x.GeneId, x => x.Symbol);

            return(new EnsemblGtf(ensemblIdToGene, ensemblIdToSymbol));
        }
コード例 #21
0
 public MergedCosmicReader(string vcfFile, string tsvFile, ISequenceProvider sequenceProvider)
 {
     _vcfFileReader    = GZipUtilities.GetAppropriateStreamReader(vcfFile);
     _tsvFileReader    = GZipUtilities.GetAppropriateStreamReader(tsvFile);
     _sequenceProvider = sequenceProvider;
     _refChromDict     = _sequenceProvider.RefNameToChromosome;
     _studies          = new Dictionary <string, HashSet <CosmicItem.CosmicStudy> >();
 }
コード例 #22
0
ファイル: GeneScoresMain.cs プロジェクト: zhouhufeng/Nirvana
        private static ExitCodes ProgramExecution()
        {
            var geneScoreCreator = new GeneScoreTsvCreator(GZipUtilities.GetAppropriateStreamReader(_inputPath),
                                                           new GeneAnnotationTsvWriter(_outputDirectory,
                                                                                       DataSourceVersionReader.GetSourceVersion(_inputPath + ".version"), null, 0, JsonKeyName, false));

            return(geneScoreCreator.Create());
        }
コード例 #23
0
ファイル: PhylopWriter.cs プロジェクト: YuJiang01/Nirvana
 public PhylopWriter(string inputWigFixFile, DataSourceVersion version, GenomeAssembly genomeAssembly,
                     string outputNirvanaDirectory, int intervalLength = PhylopCommon.MaxIntervalLength)
     : this(null, version, genomeAssembly, intervalLength)
 {
     _version = version;
     _reader  = GZipUtilities.GetAppropriateStreamReader(inputWigFixFile);
     _outputNirvanaDirectory = outputNirvanaDirectory;
 }
コード例 #24
0
                        Accession37, IDictionary <string, IChromosome> Accession38) GetSequenceDictionaries(string referencePath,
                                                                                                            string assemblyInfo37Path, string assemblyInfo38Path)
        {
            var(_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(referencePath);
            var accession37Dict = AssemblyReader.GetAccessionToChromosome(GZipUtilities.GetAppropriateStreamReader(assemblyInfo37Path), refNameToChromosome);
            var accession38Dict = AssemblyReader.GetAccessionToChromosome(GZipUtilities.GetAppropriateStreamReader(assemblyInfo38Path), refNameToChromosome);

            return(refNameToChromosome, accession37Dict, accession38Dict);
        }
コード例 #25
0
        public void CosmicAlleleSpecificIndel()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);
            var items        = cosmicReader.GetCosmicItems();

            Assert.Single(items);
        }
コード例 #26
0
ファイル: ConflictRemover.cs プロジェクト: YuJiang01/Nirvana
        public int RemoveConflictingLines()
        {
            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inFile))
                using (var writer = GZipUtilities.GetStreamWriter(_outFile))
                {
                    string line;
                    var    vcfLines            = new List <string>(VcfBufferSize); //all lines for the last few positions will be tracked in this dictionary
                    var    hasConflictingEntry = new Dictionary <string, bool>();  //indicates if there is a conflicting entry for a certain allele.

                    while ((line = reader.ReadLine()) != null)
                    {
                        if (line.StartsWith("#"))
                        {
                            // streaming the header lines
                            writer.WriteLine(line);
                            continue;
                        }

                        // parsing vcf line
                        var vcfColumns = line.Split(new[] { '\t' }, VcfCommon.InfoIndex + 1);

                        var chromosome  = vcfColumns[VcfCommon.ChromIndex];
                        var vcfPosition = Convert.ToInt32(vcfColumns[VcfCommon.PosIndex]);
                        var refAllele   = vcfColumns[VcfCommon.RefIndex];
                        var altAlleles  = vcfColumns[VcfCommon.AltIndex].Split(',');

                        if (chromosome != _currentRefSeq || vcfPosition > _maxVidPosition)
                        {
                            FlushVcfLineBuffer(vcfLines, hasConflictingEntry, writer);
                            vcfLines.Clear();
                            hasConflictingEntry.Clear();

                            _currentRefSeq = chromosome;
                        }

                        foreach (var altAllele in altAlleles)
                        {
                            var alleleId = GetAlleleId(chromosome, vcfPosition, refAllele, altAllele);

                            if (hasConflictingEntry.ContainsKey(alleleId))
                            {
                                hasConflictingEntry[alleleId] = true;                         //wipe out any lines containing this alt allele
                            }
                            else
                            {
                                hasConflictingEntry[alleleId] = false;
                            }
                        }

                        vcfLines.Add(line);
                    }
                    // flushing out the remaining lines
                    FlushVcfLineBuffer(vcfLines, hasConflictingEntry, writer);
                }
            return(_noLinesRemoved);
        }
コード例 #27
0
        public void CosmicAltAllele()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);
            var items        = cosmicReader.GetCosmicItems().ToList();

            Assert.Single(items);
            Assert.Contains("\"refAllele\":\"C\"", items[0].GetJsonString());
        }
コード例 #28
0
        private SaHeader GetHeader()
        {
            SaHeader header;

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_fileName))
            {
                header = ReadHeader(reader);
            }

            return(header);
        }
コード例 #29
0
        private ExitCodes ProgramExecution()
        {
            var reader            = GZipUtilities.GetAppropriateStreamReader(_inputFileArg);
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferenceArg));

            var version          = DataSourceVersionReader.GetSourceVersion(_inputFileArg + ".version");
            var topMedTsvCreator = new TopMedTsvCreator(reader, referenceProvider, version, _outputDirArg);

            topMedTsvCreator.CreateTsvs();
            return(ExitCodes.Success);
        }
コード例 #30
0
        /// <summary>
        /// constructor
        /// </summary>
        public VepSimpleIntervalReader(string filePath, string description, GlobalImportCommon.FileType fileType)
        {
            // sanity check
            if (!File.Exists(filePath))
            {
                throw new FileNotFoundException($"The specified intron file ({filePath}) does not exist.");
            }

            // open the vcf file and parse the header
            _reader = GZipUtilities.GetAppropriateStreamReader(filePath);
            VepReaderCommon.GetHeader(description, filePath, fileType, _reader);
        }