public void ReadWriteCustomAnnotation() { var randomPath = GetRandomPath(true); // create our expected data source versions var dbSnpVersion = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks); var clinVarVersion = new DataSourceVersion("ClinVar", "13.5", DateTime.Parse("2015-01-19").Ticks); var expectedDataSourceVersions = new List <DataSourceVersion> { dbSnpVersion, clinVarVersion }; var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf")); var customReader = new CustomAnnotationReader(customFile, _renamer); // all items from this file should be of type cosmic. var customItems = customReader.ToList(); var sa = new SupplementaryAnnotationPosition(69224); var saCreator = new SupplementaryPositionCreator(sa); foreach (var customItem in customItems) { // NOTE that the two custom items are for different position, but for the purpose of our test, this is not an issue. customItem.SetSupplementaryAnnotations(saCreator); } // the above code was unit tested in MergeDbSnpClinVar() using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions)) { writer.Write(saCreator, sa.ReferencePosition); } // read the supplementary annotation file using (var reader = new SupplementaryAnnotationReader(randomPath)) { // extract the three annotations var observedAnnotation1 = reader.GetAnnotation(69224); Assert.NotNull(observedAnnotation1); for (var i = 0; i < sa.CustomItems.Count; i++) { Assert.Equal(sa.CustomItems[i].Id, observedAnnotation1.CustomItems[i].Id); Assert.Equal(sa.CustomItems[i].AnnotationType, observedAnnotation1.CustomItems[i].AnnotationType); Assert.Equal(sa.CustomItems[i].IsAlleleSpecific, observedAnnotation1.CustomItems[i].IsAlleleSpecific); Assert.True(sa.CustomItems[i].StringFields.SequenceEqual(observedAnnotation1.CustomItems[i].StringFields)); if (sa.CustomItems[i].BooleanFields.Count > 0) { Assert.True(sa.CustomItems[i].BooleanFields.SequenceEqual(observedAnnotation1.CustomItems[i].BooleanFields)); } } } }
public void BasicReaderTest() { var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf")); var customReader = new CustomAnnotationReader(customFile, _refChromDict); // all items from this file should be of type cosmic. foreach (var customItem in customReader.GetCustomItems()) { Assert.Equal("cosmic", customItem.AnnotationType); } }
private void CreateCutomAnnoTsv(string fileName) { if (string.IsNullOrEmpty(fileName)) { return; } Console.WriteLine($"Creating TSV from {fileName}"); var version = DataSourceVersionReader.GetSourceVersion(fileName); var customReader = new CustomAnnotationReader(new FileInfo(fileName), _refNamesDictionary); using (var writer = new CustomAnnoTsvWriter(version, _outputDirectory, _genomeAssembly, customReader.IsPositional, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath)))) { TsvWriterUtilities.WriteSortedItems(customReader.GetCustomItems(), writer); } Console.WriteLine($"Finished {fileName}"); }
public void StringFieldsTest() { var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf")); var customReader = new CustomAnnotationReader(customFile, _refChromDict); // all items from this file should be of type cosmic. var i = 0; foreach (var customItem in customReader.GetCustomItems()) { switch (i) { case 0: //checking the first item Assert.Equal("COSM3677745", customItem.Id); Assert.Equal("OR4F5", customItem.StringFields["gene"]); Assert.Equal("+", customItem.StringFields["strand"]); Assert.Equal("c.134A>C", customItem.StringFields["cds"]); Assert.Equal("p.D45A", customItem.StringFields["aminoAcid"]); Assert.Equal("1", customItem.StringFields["count"]); break; case 1: Assert.Equal("COSM911918", customItem.Id); Assert.Equal("OR4F5", customItem.StringFields["gene"]); Assert.Equal("+", customItem.StringFields["strand"]); Assert.Equal("c.255C>A", customItem.StringFields["cds"]); Assert.Equal("p.I85I", customItem.StringFields["aminoAcid"]); Assert.Equal("1", customItem.StringFields["count"]); Assert.Equal("inExomeTarget", customItem.BooleanFields[0]); break; } i++; } }
// constructor public CreateSupplementaryDatabase( string compressedReferencePath, string nsdBaseFileName, string dbSnpFileName = null, string cosmicVcfFile = null, string cosmicTsvFile = null, string clinVarFileName = null, string oneKGenomeAfFileName = null, string evsFileName = null, string exacFileName = null, List <string> customFiles = null, string dgvFileName = null, string oneKSvFileName = null, string clinGenFileName = null, string chrWhiteList = null) { _nsdBaseFileName = nsdBaseFileName; _dataSources = new List <DataSourceVersion>(); _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >(); _supplementaryIntervalList = new List <SupplementaryInterval>(); Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion); _compressedSequence = new CompressedSequence(); var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence); _renamer = _compressedSequence.Renamer; _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence); if (!string.IsNullOrEmpty(chrWhiteList)) { Console.WriteLine("Creating SA for the following chromosomes only:"); foreach (var refSeq in chrWhiteList.Split(',')) { InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq)); Console.Write(refSeq + ","); } Console.WriteLine(); } else { InputFileParserUtilities.ChromosomeWhiteList = null; } if (dbSnpFileName != null) { AddSourceVersion(dbSnpFileName); var dbSnpReader = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer); var dbSnpEnumerator = dbSnpReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dbSnpEnumerator); } if (cosmicVcfFile != null && cosmicTsvFile != null) { AddSourceVersion(cosmicVcfFile); var cosmicReader = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer); var cosmicEnumerator = cosmicReader.GetEnumerator(); _iSupplementaryDataItemList.Add(cosmicEnumerator); } if (oneKGenomeAfFileName != null) { AddSourceVersion(oneKGenomeAfFileName); var oneKGenReader = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer); var oneKGenEnumerator = oneKGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenEnumerator); } if (oneKSvFileName != null) { if (oneKGenomeAfFileName == null) { AddSourceVersion(oneKSvFileName); } var oneKGenSvReader = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer); var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(oneKGenSvEnumerator); } if (evsFileName != null) { AddSourceVersion(evsFileName); var evsReader = new EvsReader(new FileInfo(evsFileName), _renamer); var evsEnumerator = evsReader.GetEnumerator(); _iSupplementaryDataItemList.Add(evsEnumerator); } if (exacFileName != null) { AddSourceVersion(exacFileName); var exacReader = new ExacReader(new FileInfo(exacFileName), _renamer); var exacEnumerator = exacReader.GetEnumerator(); _iSupplementaryDataItemList.Add(exacEnumerator); } if (clinVarFileName != null) { AddSourceVersion(clinVarFileName); var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence); var clinVarList = clinVarReader.ToList(); clinVarList.Sort(); Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file"); IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator(); _iSupplementaryDataItemList.Add(clinVarEnumerator); } if (dgvFileName != null) { AddSourceVersion(dgvFileName); var dgvReader = new DgvReader(new FileInfo(dgvFileName), _renamer); var dgvEnumerator = dgvReader.GetEnumerator(); _iSupplementaryDataItemList.Add(dgvEnumerator); } if (clinGenFileName != null) { AddSourceVersion(clinGenFileName); var clinGenReader = new ClinGenReader(new FileInfo(clinGenFileName), _renamer); var clinGenEnumerator = clinGenReader.GetEnumerator(); _iSupplementaryDataItemList.Add(clinGenEnumerator); } if (customFiles != null) { foreach (var customFile in customFiles) { AddSourceVersion(customFile); var customReader = new CustomAnnotationReader(new FileInfo(customFile), _renamer); var customEnumerator = customReader.GetEnumerator(); _iSupplementaryDataItemList.Add(customEnumerator); } } // initializing the IEnumerators in the list foreach (var iDataEnumerator in _iSupplementaryDataItemList) { if (!iDataEnumerator.MoveNext()) { _iSupplementaryDataItemList.Remove(iDataEnumerator); } } _additionalItemsList = new List <SupplementaryDataItem>(); }