Пример #1
0
 public void TestDbSnpReader()
 {
     using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(TestDgvFile), ChromosomeUtilities.RefNameToChromosome))
     {
         Assert.True(dgvReader.GetItems().SequenceEqual(CreateTruthDgvItemSequence()));
     }
 }
Пример #2
0
        public void ExtractDgvComplex()
        {
            const string dgvLine = "esv2421662	1	12841928	12971833	OTHER	complex	Altshuler_et_al_2010	20811451	SNP array			essv5038349,essv5012238	M		1184	20	70		HNRNPCL1,LOC649330,PRAMEF1,PRAMEF10,PRAMEF11,PRAMEF2,PRAMEF4	NA10838,NA10847";

            var dgvItem    = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);
            var jsonString = dgvItem.GetJsonString();

            Assert.Equal("\"chromosome\":\"1\",\"begin\":12841928,\"end\":12971833,\"variantType\":\"complex_structural_alteration\",\"id\":\"esv2421662\",\"sampleSize\":1184,\"observedGains\":20,\"observedLosses\":70,\"variantFreqAll\":0.07601", jsonString);
        }
Пример #3
0
        public void ExtractDgvCnv()
        {
            const string dgvLine = "nsv482937	1	1	2300000	CNV	loss	Iafrate_et_al_2004	15286789	BAC aCGH,FISH			nssv2995976	M		39	0	1		ACAP3,AGRN,WASH7P	";

            var dgvItem    = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);
            var jsonString = dgvItem.GetJsonString();

            Assert.Equal("\"chromosome\":\"1\",\"begin\":1,\"end\":2300000,\"variantType\":\"copy_number_loss\",\"id\":\"nsv482937\",\"sampleSize\":39,\"observedLosses\":1,\"variantFreqAll\":0.02564", jsonString);
        }
Пример #4
0
        private void CreateSvTsv(string sourceName, string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            var benchMark = new Benchmark();
            //Console.WriteLine($"Creating TSV from {fileName}");
            var dataSource = "";
            var version    = DataSourceVersionReader.GetSourceVersion(fileName);

            switch (sourceName)
            {
            case InterimSaCommon.DgvTag:
                dataSource = "DGV";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants))
                {
                    var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetDgvItems(), writer);
                }
                break;

            case InterimSaCommon.ClinGenTag:
                dataSource = "ClinGen";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetClinGenItems(), writer);
                }

                break;

            case InterimSaCommon.OnekSvTag:
                dataSource = "OnekSv";
                using (var writer = new IntervalTsvWriter(_outputDirectory, version,
                                                          _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag,
                                                          ReportFor.StructuralVariants))
                {
                    var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary);
                    CreateSvTsv(reader.GetOneKGenSvItems(), writer);
                }

                break;

            default:
                Console.WriteLine("invalid source name");
                break;
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan);
        }
Пример #5
0
        private static ExitCodes ProgramExecution()
        {
            var    referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var    version           = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version");
            string outFileName       = $"{version.Name}_{version.Version}";

            using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome))
                using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix)))
                    using (var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, referenceProvider.Assembly, SaCommon.DgvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion))
                    {
                        nsiWriter.Write(dgvReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
Пример #6
0
        public void EmptyObservedLossesAndGains()
        {
            const string dgvLine = "nsv161172	1	88190	89153	CNV	deletion	Mills_et_al_2006	16902084	Sequencing			nssv179750	M		24					";

            var dgvItem    = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome);
            var jsonString = dgvItem.GetJsonString();

            Assert.Equal("\"chromosome\":\"1\",\"begin\":88190,\"end\":89153,\"variantType\":\"copy_number_loss\",\"id\":\"nsv161172\",\"sampleSize\":24", jsonString);
            //Assert.Equal("1", dgvInterval.Chromosome.EnsemblName);
            //Assert.Equal(88190, dgvInterval.Start);
            //Assert.Equal(89153, dgvInterval.End);
            //Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString());
            //Assert.Equal("dgv", dgvInterval.Source);
            //Assert.Equal("nsv161172", dgvInterval.StringValues["id"]);
            //Assert.Equal(24, dgvInterval.IntValues["sampleSize"]);
            //Assert.False(dgvInterval.IntValues.ContainsKey("observedGains"));
            //Assert.False(dgvInterval.IntValues.ContainsKey("observedLosses"));
            //Assert.False(dgvInterval.PopulationFrequencies.ContainsKey("variantFreqAll"));
        }
Пример #7
0
        public void ExtractDgvComplex()
        {
            const string dgvLine = "esv2421662	1	12841928	12971833	OTHER	complex	Altshuler_et_al_2010	20811451	SNP array			essv5038349,essv5012238	M		1184	20	70		HNRNPCL1,LOC649330,PRAMEF1,PRAMEF10,PRAMEF11,PRAMEF2,PRAMEF4	NA10838,NA10847";

            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer);

            Assert.True(dgvItem.IsInterval);

            var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer);

            Assert.Equal(12841928, dgvInterval.Start);
            Assert.Equal(12971833, dgvInterval.End);
            Assert.Equal("complex_structural_alteration", dgvInterval.VariantType.ToString());
            Assert.Equal("DGV", dgvInterval.Source);
            Assert.Equal("esv2421662", dgvInterval.StringValues["id"]);
            Assert.Equal("0.07601", dgvInterval.PopulationFrequencies["variantFreqAll"].ToString("0.#####"));
            Assert.Equal(1184, dgvInterval.IntValues["sampleSize"]);
            Assert.Equal(70, dgvInterval.IntValues["observedLosses"]);
            Assert.Equal(20, dgvInterval.IntValues["observedGains"]);
        }
Пример #8
0
        public void ExtractDgvCnv()
        {
            const string dgvLine = "nsv482937	1	1	2300000	CNV	loss	Iafrate_et_al_2004	15286789	BAC aCGH,FISH			nssv2995976	M		39	0	1		ACAP3,AGRN,WASH7P	";

            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer);

            Assert.True(dgvItem.IsInterval);

            var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer);

            Assert.Equal(1, dgvInterval.Start);
            Assert.Equal(2300000, dgvInterval.End);
            Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString());
            Assert.Equal("DGV", dgvInterval.Source);
            Assert.Equal("nsv482937", dgvInterval.StringValues["id"]);
            Assert.Equal("0.02564", dgvInterval.PopulationFrequencies["variantFreqAll"].ToString("0.#####"));
            Assert.Equal(39, dgvInterval.IntValues["sampleSize"]);
            Assert.Equal(1, dgvInterval.IntValues["observedLosses"]);
            Assert.False(dgvInterval.IntValues.ContainsKey("observedGains"));
        }
Пример #9
0
        public void EmptyObservedLossesAndGains()
        {
            const string dgvLine = "nsv161172	1	88190	89153	CNV	deletion	Mills_et_al_2006	16902084	Sequencing			nssv179750	M		24					";

            var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer);

            Assert.True(dgvItem.IsInterval);

            var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer);

            Assert.Equal("1", dgvInterval.ReferenceName);
            Assert.Equal(88190, dgvInterval.Start);
            Assert.Equal(89153, dgvInterval.End);
            Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString());
            Assert.Equal("DGV", dgvInterval.Source);
            Assert.Equal("nsv161172", dgvInterval.StringValues["id"]);
            Assert.Equal(24, dgvInterval.IntValues["sampleSize"]);
            Assert.False(dgvInterval.IntValues.ContainsKey("observedGains"));
            Assert.False(dgvInterval.IntValues.ContainsKey("observedLosses"));
            Assert.False(dgvInterval.PopulationFrequencies.ContainsKey("variantFreqAll"));
        }
Пример #10
0
        public void TestDbSnpReader()
        {
            var dgvReader = new DgvReader(TestDgvFile, _renamer);

            Assert.True(dgvReader.SequenceEqual(CreateTruthDgvItemSequence()));
        }
Пример #11
0
        public void TestDbSnpReader()
        {
            var dgvReader = new DgvReader(TestDgvFile, RefChromDict);

            Assert.True(dgvReader.GetDgvItems().SequenceEqual(CreateTruthDgvItemSequence()));
        }
Пример #12
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }