Example #1
0
        public void OnekGenSvReader()
        {
            var inputFileInfo = new FileInfo(Resources.InputFiles("1000G_SVs.tsv"));

            var svReader = new OneKGenSvReader(inputFileInfo, _renamer);

            var svItemList = svReader.ToList();

            var si = svItemList[0].GetSupplementaryInterval(_renamer);

            Assert.Equal("esv3584976", si.StringValues["id"]);
            Assert.Equal(668631, si.Start);
            Assert.Equal(850204, si.End);
            Assert.Equal(VariantType.copy_number_gain, si.VariantType);
            Assert.Equal(0.02396, si.PopulationFrequencies["variantFreqAll"]);
            Assert.Equal(2504, si.IntValues["sampleSize"]);

            //checking out the next item that should be a copy number variant (both loss and gain)
            si = svItemList[1].GetSupplementaryInterval(_renamer);

            Assert.Equal("esv3584977;esv3584978", si.StringValues["id"]);
            Assert.Equal(713045, si.Start);
            Assert.Equal(755966, si.End);
            Assert.Equal(VariantType.copy_number_variation, si.VariantType);

            //next one is a del (copy_number_loss)
            si = svItemList[2].GetSupplementaryInterval(_renamer);

            Assert.Equal("esv3584979", si.StringValues["id"]);
            Assert.Equal(738571, si.Start);
            Assert.Equal(742020, si.End);
            Assert.Equal(VariantType.copy_number_loss, si.VariantType);
        }
        private void CreateSvTsv(string sourceName, string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            var benchMark = new Benchmark();
            //Console.WriteLine($"Creating TSV from {fileName}");
            var dataSource = "";
            var version    = DataSourceVersionReader.GetSourceVersion(fileName);

            switch (sourceName)
            {
            case InterimSaCommon.DgvTag:
                dataSource = "DGV";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants))
                {
                    var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetDgvItems(), writer);
                }
                break;

            case InterimSaCommon.ClinGenTag:
                dataSource = "ClinGen";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetClinGenItems(), writer);
                }

                break;

            case InterimSaCommon.OnekSvTag:
                dataSource = "OnekSv";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetOneKGenSvItems(), writer);
                }

                break;

            default:
                Console.WriteLine("invalid source name");
                break;
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan);
        }
Example #3
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version");

            string outFileName = $"{version.Name}_{version.Version}".Replace(' ', '_');

            using (var reader = GZipUtilities.GetAppropriateStreamReader(_inputFileName))
                using (var oneKGenSvReader = new OneKGenSvReader(reader, referenceProvider.RefNameToChromosome))
                    using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.IntervalFileSuffix)))
                        using (var nsiWriter = new NsiWriter(nsaStream, version, referenceProvider.Assembly,
                                                             SaCommon.OnekSvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                        {
                            nsiWriter.Write(oneKGenSvReader.GetItems());
                        }

            return(ExitCodes.Success);
        }
Example #4
0
        public void OnekGenSvReader()
        {
            using (var reader = new StreamReader(GetOneKgSvStream()))
            {
                var svReader = new OneKGenSvReader(reader, _refChromDict);

                var svItemList = svReader.GetItems().ToList();

                Assert.Equal(4, svItemList.Count);

                Assert.Equal("\"chromosome\":\"1\",\"begin\":668631,\"end\":850204,\"variantType\":\"copy_number_gain\",\"id\":\"esv3584976\",\"allAn\":5008,\"allAc\":64,\"allAf\":0.01278,\"afrAf\":0.0015,\"amrAf\":0,\"eurAf\":0.001,\"easAf\":0.0595,\"sasAf\":0.001", svItemList[0].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":713045,\"end\":755966,\"variantType\":\"copy_number_variation\",\"id\":\"esv3584977;esv3584978\",\"allAn\":5008,\"allAc\":209,\"allAf\":0.041733,\"afrAf\":0.0303,\"amrAf\":0.0273,\"eurAf\":0.0427,\"easAf\":0.0625,\"sasAf\":0.045", svItemList[1].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":738571,\"end\":742020,\"variantType\":\"copy_number_loss\",\"id\":\"esv3584979\",\"allAn\":5008,\"allAc\":1,\"allAf\":0.0002,\"afrAf\":0,\"amrAf\":0,\"eurAf\":0,\"easAf\":0.001,\"sasAf\":0", svItemList[2].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":645711,\"end\":645932,\"variantType\":\"mobile_element_insertion\",\"id\":\"esv3584975\",\"allAn\":5008,\"allAc\":35,\"allAf\":0.006989,\"afrAf\":0,\"amrAf\":0.0072,\"eurAf\":0.0189,\"easAf\":0.0069,\"sasAf\":0.0041", svItemList[3].GetJsonString());
            }
        }
Example #5
0
        public void OnekGenSvReader()
        {
            using (var reader = new StreamReader(GetOneKgSvStream()))
            {
                var svReader = new OneKGenSvReader(reader, ChromosomeUtilities.RefNameToChromosome);

                var svItemList = svReader.GetItems().ToList();

                Assert.Equal(4, svItemList.Count);

                Assert.Equal("\"chromosome\":\"1\",\"begin\":668631,\"end\":850204,\"variantType\":\"copy_number_gain\",\"id\":\"esv3584976\",\"allAn\":5008,\"allAc\":64,\"allAf\":0.01278,\"afrAf\":0.0015,\"amrAf\":0,\"eurAf\":0.001,\"easAf\":0.0595,\"sasAf\":0.001", svItemList[0].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":713045,\"end\":755966,\"variantType\":\"copy_number_variation\",\"id\":\"esv3584977;esv3584978\",\"allAn\":5008,\"allAc\":209,\"allAf\":0.041733,\"afrAf\":0.0303,\"amrAf\":0.0273,\"eurAf\":0.0427,\"easAf\":0.0625,\"sasAf\":0.045", svItemList[1].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":738571,\"end\":742020,\"variantType\":\"copy_number_loss\",\"id\":\"esv3584979\",\"allAn\":5008,\"allAc\":1,\"allAf\":0.0002,\"afrAf\":0,\"amrAf\":0,\"eurAf\":0,\"easAf\":0.001,\"sasAf\":0", svItemList[2].GetJsonString());

                Assert.Equal("\"chromosome\":\"1\",\"begin\":2397656,\"end\":2401469,\"variantType\":\"copy_number_gain\",\"id\":\"esv3585028\",\"allAn\":5008,\"allAc\":96,\"allAf\":0.019169,\"afrAf\":0.0287,\"amrAf\":0.0216,\"eurAf\":0.0119,\"easAf\":0.0248,\"sasAf\":0.0061", svItemList[3].GetJsonString());
            }
        }
Example #6
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }