public void TwoStudyCosmicCoding() { var vcfReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.vcf")); var tsvReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.tsv")); var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict); var cosmicItems = cosmicReader.GetCosmicItems(); var count = 0; foreach (var cosmicItem in cosmicItems) { switch (count) { case 0: foreach (var study in cosmicItem.Studies) { Assert.Equal("544", study.Id); Assert.Equal(new [] { "haematopoietic_and_lymphoid_tissue" }, study.Sites); Assert.Equal(new [] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies); } break; case 1: foreach (var study in cosmicItem.Studies) { Assert.Equal("544", study.Id); Assert.Equal(new[] { "haematopoietic;lymphoid_tissue" }, study.Sites); Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies); } break; } count++; } }
public void TwoStudyCosmicCoding() { var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), _renamer); var enumerator = cosmicReader.GetEnumerator(); enumerator.MoveNext(); var cosmicItem = enumerator.Current; foreach (var study in cosmicItem.Studies) { Assert.Equal("544", study.ID); Assert.Equal("haematopoietic_and_lymphoid_tissue", study.PrimarySite); Assert.Equal("haematopoietic_neoplasm", study.Histology); } enumerator.MoveNext(); cosmicItem = enumerator.Current; foreach (var study in cosmicItem.Studies) { Assert.Equal("544", study.ID); Assert.Equal("haematopoietic;lymphoid_tissue", study.PrimarySite); Assert.Equal("haematopoietic_neoplasm", study.Histology); } enumerator.Dispose(); }
public void CosmicAlleleSpecificIndel() { var vcfReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.vcf")); var tsvReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.tsv")); var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict); var items = cosmicReader.GetCosmicItems(); Assert.Single(items); }
public void CosmicAlleleSpecificIndel() { //10188320 var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, "G", 'A', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM18152.vcf"), Resources.TopPath("COSM18152.tsv"), seqProvider); var items = cosmicReader.GetItems(); Assert.Single(items); }
public void CosmicAltAllele() { var seqProvider = ParserTestUtils.GetSequenceProvider(6928019, "C", 'A', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM983708.vcf"), Resources.TopPath("COSM983708.tsv"), seqProvider); var items = cosmicReader.GetItems().ToList(); Assert.Single((IEnumerable)items); Assert.Contains("\"refAllele\":\"-\"", items[0].GetJsonString()); }
public void CosmicAltAllele() { var vcfReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.vcf")); var tsvReader = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.tsv")); var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict); var items = cosmicReader.GetCosmicItems().ToList(); Assert.Single(items); Assert.Contains("\"refAllele\":\"C\"", items[0].GetJsonString()); }
private static ExitCodes ProgramExecution() { var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference)); var cosmicReader = new MergedCosmicReader(_vcfFile, _tsvFile, referenceProvider); var version = DataSourceVersionReader.GetSourceVersion(_vcfFile + ".version"); string outFileName = $"{version.Name}_{version.Version}"; using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix))) using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix))) using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.CosmicTag, false, true, SaCommon.SchemaVersion, false)) { nsaWriter.Write(cosmicReader.GetItems()); } return(ExitCodes.Success); }
public void TwoStudyCosmicCoding() { var seqProvider = ParserTestUtils.GetSequenceProvider(35416, "A", 'C', ChromosomeUtilities.RefNameToChromosome); var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider); var cosmicItem = cosmicReader.GetItems().ToList()[0]; var studies = cosmicItem.Studies.ToList(); Assert.Equal("544", studies[0].Id); Assert.Equal(new[] { "haematopoietic and lymphoid tissue" }, studies[0].Sites); Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[0].Histologies); //Assert.Equal(new [] { "haematopoietic neoplasm", "acute myeloid leukaemia" }, study.Histologies); Assert.Equal("544", studies[1].Id); Assert.Equal(new[] { "haematopoietic;lymphoid tissue" }, studies[1].Sites); Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[1].Histologies); //Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies); }
public void IndelWithNoLeadingBase() { var tsvReader = new StreamReader(new MemoryStream()); var vcfReader = new StreamReader(new MemoryStream()); var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict); const string vcfLine1 = "3 10188320 COSM14426 GGTACTGAC A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2"; const string vcfLine2 = "3 10188320 COSM18152 G A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7"; var items = cosmicReader.ExtractCosmicItems(vcfLine1); Assert.Equal("GGTACTGAC", items[0].ReferenceAllele); Assert.Equal("A", items[0].AlternateAllele); Assert.Equal(10188320, items[0].Start); var items2 = cosmicReader.ExtractCosmicItems(vcfLine2); Assert.Equal("G", items2[0].ReferenceAllele); Assert.Equal("A", items2[0].AlternateAllele); Assert.Equal(10188320, items2[0].Start); }
public void IndelWithNoLeadingBase() { var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, "GGTACTGAC", 'A', ChromosomeUtilities.RefNameToChromosome); //the files provided are just for the sake of construction. The main aim is to test the VCF line parsing capabilities var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider); const string vcfLine1 = "3 10188320 COSM14426 GGTACTGAC A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2"; const string vcfLine2 = "3 10188320 COSM18152 G A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7"; var items = cosmicReader.ExtractCosmicItems(vcfLine1); Assert.Equal("GGTACTGAC", items[0].RefAllele); Assert.Equal("A", items[0].AltAllele); Assert.Equal(10188320, items[0].Position); var items2 = cosmicReader.ExtractCosmicItems(vcfLine2); Assert.Equal("G", items2[0].RefAllele); Assert.Equal("A", items2[0].AltAllele); Assert.Equal(10188320, items2[0].Position); }
public void IndelWithNoLeadingBase() { var cosmicReader = new MergedCosmicReader(); const string vcfLine1 = "3 10188320 COSM14426 GGTACTGAC A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2"; const string vcfLine2 = "3 10188320 COSM18152 G A . . GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7"; var sa = new SupplementaryAnnotationPosition(10188320); var saCreator = new SupplementaryPositionCreator(sa); foreach (var cosmicItem in cosmicReader.ExtractCosmicItems(vcfLine1)) { cosmicItem.SetSupplementaryAnnotations(saCreator); } Assert.Equal("9A", sa.CosmicItems[0].SaAltAllele); foreach (var cosmicItem in cosmicReader.ExtractCosmicItems(vcfLine2)) { cosmicItem.SetSupplementaryAnnotations(saCreator); } Assert.Equal("A", sa.CosmicItems[1].SaAltAllele); }
private void CreateCosmicTsv(string vcfFile, string tsvFile) { if (string.IsNullOrEmpty(tsvFile) || string.IsNullOrEmpty(vcfFile)) { return; } var benchMark = new Benchmark(); var version = DataSourceVersionReader.GetSourceVersion(vcfFile); using (var writer = new CosmicTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { var tsvReader = GZipUtilities.GetAppropriateStreamReader(tsvFile); var vcfReader = GZipUtilities.GetAppropriateStreamReader(vcfFile); var reader = new MergedCosmicReader(vcfReader, tsvReader, _refNamesDictionary); TsvWriterUtilities.WriteSortedItems(reader.GetCosmicItems(), writer); } var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime()); TsvWriterUtilities.WriteCompleteInfo("COSMIC", version.Version, timeSpan); }
// constructor public CreateSupplementaryDatabase( string compressedReferencePath, string nsdBaseFileName, string dbSnpFileName = null, string cosmicVcfFile = null, string cosmicTsvFile = null, string clinVarFileName = null, string oneKGenomeAfFileName = null, string evsFileName = null, string exacFileName = null, List <string> customFiles = null, string dgvFileName = null, string oneKSvFileName = null, string clinGenFileName = null, string chrWhiteList = null) { _nsdBaseFileName = nsdBaseFileName; _dataSources = new List <DataSourceVersion>(); _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >(); _supplementaryIntervalList = new List <SupplementaryInterval>(); Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion); _compressedSequence = new CompressedSequence(); var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence); _renamer = _compressedSequence.Renamer; _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence); if (!string.IsNullOrEmpty(chrWhiteList)) { Console.WriteLine("Creating SA for the following chromosomes only:"); foreach (var refSeq in chrWhiteList.Split(',')) { InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq)); Console.Write(refSeq + ","); } Console.WriteLine(); } else { InputFileParserUtilities.ChromosomeWhiteList = null; } if (dbSnpFileName != null) { AddSourceVersion(dbSnpFileName); var dbSnpReader = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer); var dbSnpEnumerator = dbSnpReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dbSnpEnumerator); } if (cosmicVcfFile != null && cosmicTsvFile != null) { AddSourceVersion(cosmicVcfFile); var cosmicReader = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer); var cosmicEnumerator = cosmicReader.GetEnumerator(); _iSupplementaryDataItemList.Add(cosmicEnumerator); } if (oneKGenomeAfFileName != null) { AddSourceVersion(oneKGenomeAfFileName); var oneKGenReader = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer); var oneKGenEnumerator = oneKGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenEnumerator); } if (oneKSvFileName != null) { if (oneKGenomeAfFileName == null) { AddSourceVersion(oneKSvFileName); } var oneKGenSvReader = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer); var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenSvEnumerator); } if (evsFileName != null) { AddSourceVersion(evsFileName); var evsReader = new EvsReader(new FileInfo(evsFileName), _renamer); var evsEnumerator = evsReader.GetEnumerator(); _iSupplementaryDataItemList.Add(evsEnumerator); } if (exacFileName != null) { AddSourceVersion(exacFileName); var exacReader = new ExacReader(new FileInfo(exacFileName), _renamer); var exacEnumerator = exacReader.GetEnumerator(); _iSupplementaryDataItemList.Add(exacEnumerator); } if (clinVarFileName != null) { AddSourceVersion(clinVarFileName); var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence); var clinVarList = clinVarReader.ToList(); clinVarList.Sort(); Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file"); IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator(); _iSupplementaryDataItemList.Add(clinVarEnumerator); } if (dgvFileName != null) { AddSourceVersion(dgvFileName); var dgvReader = new DgvReader(new FileInfo(dgvFileName), _renamer); var dgvEnumerator = dgvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dgvEnumerator); } if (clinGenFileName != null) { AddSourceVersion(clinGenFileName); var clinGenReader = new ClinGenReader(new FileInfo(clinGenFileName), _renamer); var clinGenEnumerator = clinGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(clinGenEnumerator); } if (customFiles != null) { foreach (var customFile in customFiles) { AddSourceVersion(customFile); var customReader = new CustomAnnotationReader(new FileInfo(customFile), _renamer); var customEnumerator = customReader.GetEnumerator(); _iSupplementaryDataItemList.Add(customEnumerator); } } // initializing the IEnumerators in the list foreach (var iDataEnumerator in _iSupplementaryDataItemList) { if (!iDataEnumerator.MoveNext()) { _iSupplementaryDataItemList.Remove(iDataEnumerator); } } _additionalItemsList = new List <SupplementaryDataItem>(); }