public void TestDbSnpReader() { using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(TestDgvFile), ChromosomeUtilities.RefNameToChromosome)) { Assert.True(dgvReader.GetItems().SequenceEqual(CreateTruthDgvItemSequence())); } }
public void ExtractDgvComplex() { const string dgvLine = "esv2421662 1 12841928 12971833 OTHER complex Altshuler_et_al_2010 20811451 SNP array essv5038349,essv5012238 M 1184 20 70 HNRNPCL1,LOC649330,PRAMEF1,PRAMEF10,PRAMEF11,PRAMEF2,PRAMEF4 NA10838,NA10847"; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome); var jsonString = dgvItem.GetJsonString(); Assert.Equal("\"chromosome\":\"1\",\"begin\":12841928,\"end\":12971833,\"variantType\":\"complex_structural_alteration\",\"id\":\"esv2421662\",\"sampleSize\":1184,\"observedGains\":20,\"observedLosses\":70,\"variantFreqAll\":0.07601", jsonString); }
public void ExtractDgvCnv() { const string dgvLine = "nsv482937 1 1 2300000 CNV loss Iafrate_et_al_2004 15286789 BAC aCGH,FISH nssv2995976 M 39 0 1 ACAP3,AGRN,WASH7P "; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome); var jsonString = dgvItem.GetJsonString(); Assert.Equal("\"chromosome\":\"1\",\"begin\":1,\"end\":2300000,\"variantType\":\"copy_number_loss\",\"id\":\"nsv482937\",\"sampleSize\":39,\"observedLosses\":1,\"variantFreqAll\":0.02564", jsonString); }
private void CreateSvTsv(string sourceName, string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } var benchMark = new Benchmark(); //Console.WriteLine($"Creating TSV from {fileName}"); var dataSource = ""; var version = DataSourceVersionReader.GetSourceVersion(fileName); switch (sourceName) { case InterimSaCommon.DgvTag: dataSource = "DGV"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.DgvSchemaVersion, InterimSaCommon.DgvTag, ReportFor.StructuralVariants)) { var reader = new DgvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetDgvItems(), writer); } break; case InterimSaCommon.ClinGenTag: dataSource = "ClinGen"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.ClinGenSchemaVersion, InterimSaCommon.ClinGenTag, ReportFor.StructuralVariants)) { var reader = new ClinGenReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetClinGenItems(), writer); } break; case InterimSaCommon.OnekSvTag: dataSource = "OnekSv"; using (var writer = new IntervalTsvWriter(_outputDirectory, version, _genomeAssembly.ToString(), SaTsvCommon.OneKgenSchemaVersion, InterimSaCommon.OnekSvTag, ReportFor.StructuralVariants)) { var reader = new OneKGenSvReader(new FileInfo(fileName), _refNamesDictionary); CreateSvTsv(reader.GetOneKGenSvItems(), writer); } break; default: Console.WriteLine("invalid source name"); break; } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo(dataSource, version.Version, timeSpan); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var version = DataSourceVersionReader.GetSourceVersion(_inputFileName + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var dgvReader = new DgvReader(GZipUtilities.GetAppropriateStreamReader(_inputFileName), referenceProvider.RefNameToChromosome)) using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SiFileSuffix))) using (var nsiWriter = new NsiWriter(new ExtendedBinaryWriter(nsaStream), version, referenceProvider.Assembly, SaCommon.DgvTag, ReportFor.StructuralVariants, SaCommon.SchemaVersion)) { nsiWriter.Write(dgvReader.GetItems()); } return(ExitCodes.Success); }
public void EmptyObservedLossesAndGains() { const string dgvLine = "nsv161172 1 88190 89153 CNV deletion Mills_et_al_2006 16902084 Sequencing nssv179750 M 24 "; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, ChromosomeUtilities.RefNameToChromosome); var jsonString = dgvItem.GetJsonString(); Assert.Equal("\"chromosome\":\"1\",\"begin\":88190,\"end\":89153,\"variantType\":\"copy_number_loss\",\"id\":\"nsv161172\",\"sampleSize\":24", jsonString); //Assert.Equal("1", dgvInterval.Chromosome.EnsemblName); //Assert.Equal(88190, dgvInterval.Start); //Assert.Equal(89153, dgvInterval.End); //Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString()); //Assert.Equal("dgv", dgvInterval.Source); //Assert.Equal("nsv161172", dgvInterval.StringValues["id"]); //Assert.Equal(24, dgvInterval.IntValues["sampleSize"]); //Assert.False(dgvInterval.IntValues.ContainsKey("observedGains")); //Assert.False(dgvInterval.IntValues.ContainsKey("observedLosses")); //Assert.False(dgvInterval.PopulationFrequencies.ContainsKey("variantFreqAll")); }
public void ExtractDgvComplex() { const string dgvLine = "esv2421662 1 12841928 12971833 OTHER complex Altshuler_et_al_2010 20811451 SNP array essv5038349,essv5012238 M 1184 20 70 HNRNPCL1,LOC649330,PRAMEF1,PRAMEF10,PRAMEF11,PRAMEF2,PRAMEF4 NA10838,NA10847"; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer); Assert.True(dgvItem.IsInterval); var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer); Assert.Equal(12841928, dgvInterval.Start); Assert.Equal(12971833, dgvInterval.End); Assert.Equal("complex_structural_alteration", dgvInterval.VariantType.ToString()); Assert.Equal("DGV", dgvInterval.Source); Assert.Equal("esv2421662", dgvInterval.StringValues["id"]); Assert.Equal("0.07601", dgvInterval.PopulationFrequencies["variantFreqAll"].ToString("0.#####")); Assert.Equal(1184, dgvInterval.IntValues["sampleSize"]); Assert.Equal(70, dgvInterval.IntValues["observedLosses"]); Assert.Equal(20, dgvInterval.IntValues["observedGains"]); }
public void ExtractDgvCnv() { const string dgvLine = "nsv482937 1 1 2300000 CNV loss Iafrate_et_al_2004 15286789 BAC aCGH,FISH nssv2995976 M 39 0 1 ACAP3,AGRN,WASH7P "; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer); Assert.True(dgvItem.IsInterval); var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer); Assert.Equal(1, dgvInterval.Start); Assert.Equal(2300000, dgvInterval.End); Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString()); Assert.Equal("DGV", dgvInterval.Source); Assert.Equal("nsv482937", dgvInterval.StringValues["id"]); Assert.Equal("0.02564", dgvInterval.PopulationFrequencies["variantFreqAll"].ToString("0.#####")); Assert.Equal(39, dgvInterval.IntValues["sampleSize"]); Assert.Equal(1, dgvInterval.IntValues["observedLosses"]); Assert.False(dgvInterval.IntValues.ContainsKey("observedGains")); }
public void EmptyObservedLossesAndGains() { const string dgvLine = "nsv161172 1 88190 89153 CNV deletion Mills_et_al_2006 16902084 Sequencing nssv179750 M 24 "; var dgvItem = DgvReader.ExtractDgvItem(dgvLine, _renamer); Assert.True(dgvItem.IsInterval); var dgvInterval = dgvItem.GetSupplementaryInterval(_renamer); Assert.Equal("1", dgvInterval.ReferenceName); Assert.Equal(88190, dgvInterval.Start); Assert.Equal(89153, dgvInterval.End); Assert.Equal("copy_number_loss", dgvInterval.VariantType.ToString()); Assert.Equal("DGV", dgvInterval.Source); Assert.Equal("nsv161172", dgvInterval.StringValues["id"]); Assert.Equal(24, dgvInterval.IntValues["sampleSize"]); Assert.False(dgvInterval.IntValues.ContainsKey("observedGains")); Assert.False(dgvInterval.IntValues.ContainsKey("observedLosses")); Assert.False(dgvInterval.PopulationFrequencies.ContainsKey("variantFreqAll")); }
public void TestDbSnpReader() { var dgvReader = new DgvReader(TestDgvFile, _renamer); Assert.True(dgvReader.SequenceEqual(CreateTruthDgvItemSequence())); }
public void TestDbSnpReader() { var dgvReader = new DgvReader(TestDgvFile, RefChromDict); Assert.True(dgvReader.GetDgvItems().SequenceEqual(CreateTruthDgvItemSequence())); }
// constructor public CreateSupplementaryDatabase( string compressedReferencePath, string nsdBaseFileName, string dbSnpFileName = null, string cosmicVcfFile = null, string cosmicTsvFile = null, string clinVarFileName = null, string oneKGenomeAfFileName = null, string evsFileName = null, string exacFileName = null, List <string> customFiles = null, string dgvFileName = null, string oneKSvFileName = null, string clinGenFileName = null, string chrWhiteList = null) { _nsdBaseFileName = nsdBaseFileName; _dataSources = new List <DataSourceVersion>(); _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >(); _supplementaryIntervalList = new List <SupplementaryInterval>(); Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion); _compressedSequence = new CompressedSequence(); var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence); _renamer = _compressedSequence.Renamer; _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence); if (!string.IsNullOrEmpty(chrWhiteList)) { Console.WriteLine("Creating SA for the following chromosomes only:"); foreach (var refSeq in chrWhiteList.Split(',')) { InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq)); Console.Write(refSeq + ","); } Console.WriteLine(); } else { InputFileParserUtilities.ChromosomeWhiteList = null; } if (dbSnpFileName != null) { AddSourceVersion(dbSnpFileName); var dbSnpReader = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer); var dbSnpEnumerator = dbSnpReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dbSnpEnumerator); } if (cosmicVcfFile != null && cosmicTsvFile != null) { AddSourceVersion(cosmicVcfFile); var cosmicReader = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer); var cosmicEnumerator = cosmicReader.GetEnumerator(); _iSupplementaryDataItemList.Add(cosmicEnumerator); } if (oneKGenomeAfFileName != null) { AddSourceVersion(oneKGenomeAfFileName); var oneKGenReader = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer); var oneKGenEnumerator = oneKGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenEnumerator); } if (oneKSvFileName != null) { if (oneKGenomeAfFileName == null) { AddSourceVersion(oneKSvFileName); } var oneKGenSvReader = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer); var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenSvEnumerator); } if (evsFileName != null) { AddSourceVersion(evsFileName); var evsReader = new EvsReader(new FileInfo(evsFileName), _renamer); var evsEnumerator = evsReader.GetEnumerator(); _iSupplementaryDataItemList.Add(evsEnumerator); } if (exacFileName != null) { AddSourceVersion(exacFileName); var exacReader = new ExacReader(new FileInfo(exacFileName), _renamer); var exacEnumerator = exacReader.GetEnumerator(); _iSupplementaryDataItemList.Add(exacEnumerator); } if (clinVarFileName != null) { AddSourceVersion(clinVarFileName); var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence); var clinVarList = clinVarReader.ToList(); clinVarList.Sort(); Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file"); IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator(); _iSupplementaryDataItemList.Add(clinVarEnumerator); } if (dgvFileName != null) { AddSourceVersion(dgvFileName); var dgvReader = new DgvReader(new FileInfo(dgvFileName), _renamer); var dgvEnumerator = dgvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dgvEnumerator); } if (clinGenFileName != null) { AddSourceVersion(clinGenFileName); var clinGenReader = new ClinGenReader(new FileInfo(clinGenFileName), _renamer); var clinGenEnumerator = clinGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(clinGenEnumerator); } if (customFiles != null) { foreach (var customFile in customFiles) { AddSourceVersion(customFile); var customReader = new CustomAnnotationReader(new FileInfo(customFile), _renamer); var customEnumerator = customReader.GetEnumerator(); _iSupplementaryDataItemList.Add(customEnumerator); } } // initializing the IEnumerators in the list foreach (var iDataEnumerator in _iSupplementaryDataItemList) { if (!iDataEnumerator.MoveNext()) { _iSupplementaryDataItemList.Remove(iDataEnumerator); } } _additionalItemsList = new List <SupplementaryDataItem>(); }