Пример #1
0
        public void ReadWriteCustomAnnotation()
        {
            var randomPath = GetRandomPath(true);

            // create our expected data source versions
            var dbSnpVersion   = new DataSourceVersion("dbSNP", "142", DateTime.Parse("2015-01-02").Ticks);
            var clinVarVersion = new DataSourceVersion("ClinVar", "13.5", DateTime.Parse("2015-01-19").Ticks);

            var expectedDataSourceVersions = new List <DataSourceVersion> {
                dbSnpVersion, clinVarVersion
            };

            var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf"));

            var customReader = new CustomAnnotationReader(customFile, _renamer);

            // all items from this file should be of type cosmic.
            var customItems = customReader.ToList();

            var sa        = new SupplementaryAnnotationPosition(69224);
            var saCreator = new SupplementaryPositionCreator(sa);

            foreach (var customItem in customItems)
            {
                // NOTE that the two custom items are for different position, but for the purpose of our test, this is not an issue.
                customItem.SetSupplementaryAnnotations(saCreator);
            }

            // the above code was unit tested in MergeDbSnpClinVar()
            using (var writer = new SupplementaryAnnotationWriter(randomPath, "chr1", expectedDataSourceVersions))
            {
                writer.Write(saCreator, sa.ReferencePosition);
            }

            // read the supplementary annotation file
            using (var reader = new SupplementaryAnnotationReader(randomPath))
            {
                // extract the three annotations
                var observedAnnotation1 = reader.GetAnnotation(69224);

                Assert.NotNull(observedAnnotation1);

                for (var i = 0; i < sa.CustomItems.Count; i++)
                {
                    Assert.Equal(sa.CustomItems[i].Id, observedAnnotation1.CustomItems[i].Id);
                    Assert.Equal(sa.CustomItems[i].AnnotationType, observedAnnotation1.CustomItems[i].AnnotationType);
                    Assert.Equal(sa.CustomItems[i].IsAlleleSpecific, observedAnnotation1.CustomItems[i].IsAlleleSpecific);
                    Assert.True(sa.CustomItems[i].StringFields.SequenceEqual(observedAnnotation1.CustomItems[i].StringFields));
                    if (sa.CustomItems[i].BooleanFields.Count > 0)
                    {
                        Assert.True(sa.CustomItems[i].BooleanFields.SequenceEqual(observedAnnotation1.CustomItems[i].BooleanFields));
                    }
                }
            }
        }
Пример #2
0
        public void BasicReaderTest()
        {
            var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf"));

            var customReader = new CustomAnnotationReader(customFile, _refChromDict);

            // all items from this file should be of type cosmic.
            foreach (var customItem in customReader.GetCustomItems())
            {
                Assert.Equal("cosmic", customItem.AnnotationType);
            }
        }
Пример #3
0
        private void CreateCutomAnnoTsv(string fileName)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                return;
            }

            Console.WriteLine($"Creating TSV from {fileName}");
            var version = DataSourceVersionReader.GetSourceVersion(fileName);

            var customReader = new CustomAnnotationReader(new FileInfo(fileName), _refNamesDictionary);

            using (var writer = new CustomAnnoTsvWriter(version, _outputDirectory, _genomeAssembly, customReader.IsPositional, new ReferenceSequenceProvider(FileUtilities.GetReadStream(_compressedReferencePath))))
            {
                TsvWriterUtilities.WriteSortedItems(customReader.GetCustomItems(), writer);
            }

            Console.WriteLine($"Finished {fileName}");
        }
Пример #4
0
        public void StringFieldsTest()
        {
            var customFile = new FileInfo(Resources.TopPath("customCosmic.vcf"));

            var customReader = new CustomAnnotationReader(customFile, _refChromDict);

            // all items from this file should be of type cosmic.
            var i = 0;

            foreach (var customItem in customReader.GetCustomItems())
            {
                switch (i)
                {
                case 0:    //checking the first item
                    Assert.Equal("COSM3677745", customItem.Id);
                    Assert.Equal("OR4F5", customItem.StringFields["gene"]);
                    Assert.Equal("+", customItem.StringFields["strand"]);
                    Assert.Equal("c.134A>C", customItem.StringFields["cds"]);
                    Assert.Equal("p.D45A", customItem.StringFields["aminoAcid"]);
                    Assert.Equal("1", customItem.StringFields["count"]);
                    break;

                case 1:
                    Assert.Equal("COSM911918", customItem.Id);
                    Assert.Equal("OR4F5", customItem.StringFields["gene"]);
                    Assert.Equal("+", customItem.StringFields["strand"]);
                    Assert.Equal("c.255C>A", customItem.StringFields["cds"]);
                    Assert.Equal("p.I85I", customItem.StringFields["aminoAcid"]);
                    Assert.Equal("1", customItem.StringFields["count"]);
                    Assert.Equal("inExomeTarget", customItem.BooleanFields[0]);
                    break;
                }

                i++;
            }
        }
Пример #5
0
        // constructor
        public CreateSupplementaryDatabase(
            string compressedReferencePath,
            string nsdBaseFileName,
            string dbSnpFileName        = null,
            string cosmicVcfFile        = null,
            string cosmicTsvFile        = null,
            string clinVarFileName      = null,
            string oneKGenomeAfFileName = null,
            string evsFileName          = null,
            string exacFileName         = null,
            List <string> customFiles   = null,
            string dgvFileName          = null,
            string oneKSvFileName       = null,
            string clinGenFileName      = null,
            string chrWhiteList         = null)
        {
            _nsdBaseFileName = nsdBaseFileName;
            _dataSources     = new List <DataSourceVersion>();

            _iSupplementaryDataItemList = new List <IEnumerator <SupplementaryDataItem> >();
            _supplementaryIntervalList  = new List <SupplementaryInterval>();

            Console.WriteLine("Creating supplementary annotation files... Data version: {0}, schema version: {1}", SupplementaryAnnotationCommon.DataVersion, SupplementaryAnnotationCommon.SchemaVersion);

            _compressedSequence = new CompressedSequence();
            var compressedSequenceReader = new CompressedSequenceReader(FileUtilities.GetReadStream(compressedReferencePath), _compressedSequence);

            _renamer         = _compressedSequence.Renamer;
            _dataFileManager = new DataFileManager(compressedSequenceReader, _compressedSequence);

            if (!string.IsNullOrEmpty(chrWhiteList))
            {
                Console.WriteLine("Creating SA for the following chromosomes only:");
                foreach (var refSeq in chrWhiteList.Split(','))
                {
                    InputFileParserUtilities.ChromosomeWhiteList.Add(_renamer.GetEnsemblReferenceName(refSeq));
                    Console.Write(refSeq + ",");
                }
                Console.WriteLine();
            }
            else
            {
                InputFileParserUtilities.ChromosomeWhiteList = null;
            }

            if (dbSnpFileName != null)
            {
                AddSourceVersion(dbSnpFileName);

                var dbSnpReader     = new DbSnpReader(new FileInfo(dbSnpFileName), _renamer);
                var dbSnpEnumerator = dbSnpReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dbSnpEnumerator);
            }

            if (cosmicVcfFile != null && cosmicTsvFile != null)
            {
                AddSourceVersion(cosmicVcfFile);

                var cosmicReader     = new MergedCosmicReader(cosmicVcfFile, cosmicTsvFile, _renamer);
                var cosmicEnumerator = cosmicReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(cosmicEnumerator);
            }

            if (oneKGenomeAfFileName != null)
            {
                AddSourceVersion(oneKGenomeAfFileName);

                var oneKGenReader     = new OneKGenReader(new FileInfo(oneKGenomeAfFileName), _renamer);
                var oneKGenEnumerator = oneKGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenEnumerator);
            }

            if (oneKSvFileName != null)
            {
                if (oneKGenomeAfFileName == null)
                {
                    AddSourceVersion(oneKSvFileName);
                }

                var oneKGenSvReader     = new OneKGenSvReader(new FileInfo(oneKSvFileName), _renamer);
                var oneKGenSvEnumerator = oneKGenSvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(oneKGenSvEnumerator);
            }

            if (evsFileName != null)
            {
                AddSourceVersion(evsFileName);

                var evsReader     = new EvsReader(new FileInfo(evsFileName), _renamer);
                var evsEnumerator = evsReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(evsEnumerator);
            }

            if (exacFileName != null)
            {
                AddSourceVersion(exacFileName);

                var exacReader     = new ExacReader(new FileInfo(exacFileName), _renamer);
                var exacEnumerator = exacReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(exacEnumerator);
            }

            if (clinVarFileName != null)
            {
                AddSourceVersion(clinVarFileName);

                var clinVarReader = new ClinVarXmlReader(new FileInfo(clinVarFileName), compressedSequenceReader, _compressedSequence);

                var clinVarList = clinVarReader.ToList();

                clinVarList.Sort();
                Console.WriteLine($"{clinVarList.Count} clinvar items read form XML file");

                IEnumerator <ClinVarItem> clinVarEnumerator = clinVarList.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinVarEnumerator);
            }

            if (dgvFileName != null)
            {
                AddSourceVersion(dgvFileName);

                var dgvReader     = new DgvReader(new FileInfo(dgvFileName), _renamer);
                var dgvEnumerator = dgvReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(dgvEnumerator);
            }

            if (clinGenFileName != null)
            {
                AddSourceVersion(clinGenFileName);
                var clinGenReader     = new ClinGenReader(new FileInfo(clinGenFileName), _renamer);
                var clinGenEnumerator = clinGenReader.GetEnumerator();
                _iSupplementaryDataItemList.Add(clinGenEnumerator);
            }

            if (customFiles != null)
            {
                foreach (var customFile in customFiles)
                {
                    AddSourceVersion(customFile);

                    var customReader     = new CustomAnnotationReader(new FileInfo(customFile), _renamer);
                    var customEnumerator = customReader.GetEnumerator();
                    _iSupplementaryDataItemList.Add(customEnumerator);
                }
            }

            // initializing the IEnumerators in the list
            foreach (var iDataEnumerator in _iSupplementaryDataItemList)
            {
                if (!iDataEnumerator.MoveNext())
                {
                    _iSupplementaryDataItemList.Remove(iDataEnumerator);
                }
            }

            _additionalItemsList = new List <SupplementaryDataItem>();
        }