Example #1
0
        public void TwoStudyCosmicCoding()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("cosm5428243.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);

            var cosmicItems = cosmicReader.GetCosmicItems();
            var count       = 0;

            foreach (var cosmicItem in cosmicItems)
            {
                switch (count)
                {
                case 0:
                    foreach (var study in cosmicItem.Studies)
                    {
                        Assert.Equal("544", study.Id);
                        Assert.Equal(new [] { "haematopoietic_and_lymphoid_tissue" }, study.Sites);
                        Assert.Equal(new [] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies);
                    }
                    break;

                case 1:
                    foreach (var study in cosmicItem.Studies)
                    {
                        Assert.Equal("544", study.Id);
                        Assert.Equal(new[] { "haematopoietic;lymphoid_tissue" }, study.Sites);
                        Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies);
                    }
                    break;
                }

                count++;
            }
        }
        public void TwoStudyCosmicCoding()
        {
            var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), _renamer);

            var enumerator = cosmicReader.GetEnumerator();

            enumerator.MoveNext();
            var cosmicItem = enumerator.Current;

            foreach (var study in cosmicItem.Studies)
            {
                Assert.Equal("544", study.ID);
                Assert.Equal("haematopoietic_and_lymphoid_tissue", study.PrimarySite);
                Assert.Equal("haematopoietic_neoplasm", study.Histology);
            }

            enumerator.MoveNext();
            cosmicItem = enumerator.Current;
            foreach (var study in cosmicItem.Studies)
            {
                Assert.Equal("544", study.ID);
                Assert.Equal("haematopoietic;lymphoid_tissue", study.PrimarySite);
                Assert.Equal("haematopoietic_neoplasm", study.Histology);
            }

            enumerator.Dispose();
        }
Example #3
0
        public void CosmicAlleleSpecificIndel()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM18152.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);
            var items        = cosmicReader.GetCosmicItems();

            Assert.Single(items);
        }
        public void CosmicAlleleSpecificIndel()
        {
            //10188320
            var seqProvider  = ParserTestUtils.GetSequenceProvider(10188320, "G", 'A', ChromosomeUtilities.RefNameToChromosome);
            var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM18152.vcf"), Resources.TopPath("COSM18152.tsv"), seqProvider);
            var items        = cosmicReader.GetItems();

            Assert.Single(items);
        }
        public void CosmicAltAllele()
        {
            var seqProvider  = ParserTestUtils.GetSequenceProvider(6928019, "C", 'A', ChromosomeUtilities.RefNameToChromosome);
            var cosmicReader = new MergedCosmicReader(Resources.TopPath("COSM983708.vcf"), Resources.TopPath("COSM983708.tsv"), seqProvider);
            var items        = cosmicReader.GetItems().ToList();

            Assert.Single((IEnumerable)items);
            Assert.Contains("\"refAllele\":\"-\"", items[0].GetJsonString());
        }
Example #6
0
        public void CosmicAltAllele()
        {
            var vcfReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.vcf"));
            var tsvReader    = GZipUtilities.GetAppropriateStreamReader(Resources.TopPath("COSM983708.tsv"));
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);
            var items        = cosmicReader.GetCosmicItems().ToList();

            Assert.Single(items);
            Assert.Contains("\"refAllele\":\"C\"", items[0].GetJsonString());
        }
Example #7
0
        private static ExitCodes ProgramExecution()
        {
            var referenceProvider = new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReference));
            var cosmicReader      = new MergedCosmicReader(_vcfFile, _tsvFile, referenceProvider);
            var version           = DataSourceVersionReader.GetSourceVersion(_vcfFile + ".version");

            string outFileName = $"{version.Name}_{version.Version}";

            using (var nsaStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix)))
                using (var indexStream = FileUtilities.GetCreateStream(Path.Combine(_outputDirectory, outFileName + SaCommon.SaFileSuffix + SaCommon.IndexSufix)))
                    using (var nsaWriter = new NsaWriter(new ExtendedBinaryWriter(nsaStream), new ExtendedBinaryWriter(indexStream), version, referenceProvider, SaCommon.CosmicTag, false, true, SaCommon.SchemaVersion, false))
                    {
                        nsaWriter.Write(cosmicReader.GetItems());
                    }

            return(ExitCodes.Success);
        }
        public void TwoStudyCosmicCoding()
        {
            var seqProvider  = ParserTestUtils.GetSequenceProvider(35416, "A", 'C', ChromosomeUtilities.RefNameToChromosome);
            var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider);

            var cosmicItem = cosmicReader.GetItems().ToList()[0];

            var studies = cosmicItem.Studies.ToList();

            Assert.Equal("544", studies[0].Id);
            Assert.Equal(new[] { "haematopoietic and lymphoid tissue" }, studies[0].Sites);
            Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[0].Histologies);
            //Assert.Equal(new [] { "haematopoietic neoplasm", "acute myeloid leukaemia" }, study.Histologies);

            Assert.Equal("544", studies[1].Id);
            Assert.Equal(new[] { "haematopoietic;lymphoid tissue" }, studies[1].Sites);
            Assert.Equal(new[] { "haematopoietic neoplasm" }, studies[1].Histologies);
            //Assert.Equal(new[] { "haematopoietic_neoplasm", "acute_myeloid_leukaemia" }, study.Histologies);
        }
Example #9
0
        public void IndelWithNoLeadingBase()
        {
            var tsvReader    = new StreamReader(new MemoryStream());
            var vcfReader    = new StreamReader(new MemoryStream());
            var cosmicReader = new MergedCosmicReader(vcfReader, tsvReader, _refChromDict);

            const string vcfLine1 = "3	10188320	COSM14426	GGTACTGAC	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2";
            const string vcfLine2 = "3	10188320	COSM18152	G	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7";

            var items = cosmicReader.ExtractCosmicItems(vcfLine1);

            Assert.Equal("GGTACTGAC", items[0].ReferenceAllele);
            Assert.Equal("A", items[0].AlternateAllele);
            Assert.Equal(10188320, items[0].Start);

            var items2 = cosmicReader.ExtractCosmicItems(vcfLine2);

            Assert.Equal("G", items2[0].ReferenceAllele);
            Assert.Equal("A", items2[0].AlternateAllele);
            Assert.Equal(10188320, items2[0].Start);
        }
        public void IndelWithNoLeadingBase()
        {
            var seqProvider = ParserTestUtils.GetSequenceProvider(10188320, "GGTACTGAC", 'A', ChromosomeUtilities.RefNameToChromosome);
            //the files provided are just for the sake of construction. The main aim is to test the VCF line parsing capabilities
            var cosmicReader = new MergedCosmicReader(Resources.TopPath("cosm5428243.vcf"), Resources.TopPath("cosm5428243.tsv"), seqProvider);

            const string vcfLine1 = "3	10188320	COSM14426	GGTACTGAC	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2";
            const string vcfLine2 = "3	10188320	COSM18152	G	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7";

            var items = cosmicReader.ExtractCosmicItems(vcfLine1);

            Assert.Equal("GGTACTGAC", items[0].RefAllele);
            Assert.Equal("A", items[0].AltAllele);
            Assert.Equal(10188320, items[0].Position);

            var items2 = cosmicReader.ExtractCosmicItems(vcfLine2);

            Assert.Equal("G", items2[0].RefAllele);
            Assert.Equal("A", items2[0].AltAllele);
            Assert.Equal(10188320, items2[0].Position);
        }
        public void IndelWithNoLeadingBase()
        {
            var cosmicReader = new MergedCosmicReader();

            const string vcfLine1 = "3	10188320	COSM14426	GGTACTGAC	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.?;CNT=2";
            const string vcfLine2 = "3	10188320	COSM18152	G	A	.	.	GENE=VHL;STRAND=+;CDS=c.463G>A;AA=p.V155M;CNT=7";

            var sa        = new SupplementaryAnnotationPosition(10188320);
            var saCreator = new SupplementaryPositionCreator(sa);

            foreach (var cosmicItem in cosmicReader.ExtractCosmicItems(vcfLine1))
            {
                cosmicItem.SetSupplementaryAnnotations(saCreator);
            }

            Assert.Equal("9A", sa.CosmicItems[0].SaAltAllele);

            foreach (var cosmicItem in cosmicReader.ExtractCosmicItems(vcfLine2))
            {
                cosmicItem.SetSupplementaryAnnotations(saCreator);
            }

            Assert.Equal("A", sa.CosmicItems[1].SaAltAllele);
        }
        private void CreateCosmicTsv(string vcfFile, string tsvFile)
        {
            if (string.IsNullOrEmpty(tsvFile) || string.IsNullOrEmpty(vcfFile))
            {
                return;
            }

            var benchMark = new Benchmark();

            var version = DataSourceVersionReader.GetSourceVersion(vcfFile);

            using (var writer = new CosmicTsvWriter(version, _outputDirectory, _genomeAssembly, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                var tsvReader = GZipUtilities.GetAppropriateStreamReader(tsvFile);
                var vcfReader = GZipUtilities.GetAppropriateStreamReader(vcfFile);
                var reader    = new MergedCosmicReader(vcfReader, tsvReader, _refNamesDictionary);

                TsvWriterUtilities.WriteSortedItems(reader.GetCosmicItems(), writer);
            }

            var timeSpan = Benchmark.ToHumanReadable(benchMark.GetElapsedTime());

            TsvWriterUtilities.WriteCompleteInfo("COSMIC", version.Version, timeSpan);
        }
Example #13
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }