Пример #1
0
        public void WhenIOpenTheFile()
        {
            //ScenarioContext.Current.Pending();

             Vcp = new VCFParser(VcfFile);
            Assert.IsNotNull(Vcp);
        }
Пример #2
0
        private bool tmpLoadVCF(string fileName)
        {
            if (!ValidateFileName(fileName, out fileName))
            {
                return(false);
            }

            _parserSTW.Restart();

            VCFParser <Variant, VariantData> vcfParser = new VCFParser <Variant, VariantData>(source: fileName, species: Genomes.HomoSapiens, assembly: Assemblies.hg19);

            try { Repository.parsedVariants = vcfParser.Parse(); }
            catch (Exception e)
            {
                if (Path.GetDirectoryName(fileName) + Path.DirectorySeparatorChar == _workingDirectory &&
                    Path.GetExtension(fileName) == _logFile)
                {
                    Herald.Announce(Herald.MessageType.Error, string.Format("The requested extension should not have same extension as the log file."));
                }
                else
                {
                    Herald.Announce(Herald.MessageType.Error, string.Format("{0}", e.Message));
                }
                return(false);
            }
            _parserSTW.Stop();

            _accumulatedLoadET += _parserSTW.Elapsed.TotalSeconds;
            Herald.AnnounceExeReport("Loaded", new ExecutionReport(Repository.parsedVariants.intervalsCount, _parserSTW.Elapsed));

            return(true);
        }
        public void OneTranscriptOneHeterozygousSynonymous()
        {
            Genome         genome   = new Genome(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "chr1_sample.fa"));
            VCFParser      vcf      = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "chr_1_one_heterozygous_synonymous.vcf"));
            List <Variant> variants = vcf.Select(x => new Variant(null, x, genome)).ToList();

            Assert.AreEqual(1, variants.Count);

            GeneModel         geneModel           = new GeneModel(genome, Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "chr1_one_transcript.gtf"));
            List <Protein>    proteins_wo_variant = geneModel.Translate(true).ToList();
            List <Transcript> transcripts         = geneModel.ApplyVariants(variants);
            List <Protein>    proteins            = transcripts.Select(t => t.Protein()).ToList();

            Assert.AreEqual(1, geneModel.Genes.Count);
            Assert.AreEqual(1, proteins.Count);
            Assert.AreEqual(1, proteins_wo_variant.Count);
            Assert.AreEqual(1, new HashSet <string> {
                proteins[0].BaseSequence, proteins_wo_variant[0].BaseSequence
            }.Count);
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains(FunctionalClass.SILENT.ToString())));    // synonymous
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains(GenotypeType.HETEROZYGOUS.ToString()))); // synonymous
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains("1:69666")));

            string proteinFasta = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "chr_1_one_heterozygous_synonymous.fasta");

            ProteinDbWriter.WriteFastaDatabase(proteins, proteinFasta, " ");
            string[] proteinFastaLines = File.ReadLines(proteinFasta).ToArray();
            Assert.IsTrue(proteinFastaLines[0].Contains(FunctionalClass.SILENT.ToString())); // synonymous
            Assert.IsTrue(proteinFastaLines[0].Contains("1:69666"));
        }
 public void Chr19VariantTranscript()
 {
     Genome    genome    = new Genome(Path.Combine(TestContext.CurrentContext.TestDirectory, "Homo_sapiens.GRCh38.dna.chromosome.19.fa"));
     GeneModel geneModel = new GeneModel(genome, Path.Combine(TestContext.CurrentContext.TestDirectory, "ProblematicChr19", "chr19variantTranscript.gff3"));
     var       variants  = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, "ProblematicChr19", "chr19problematic.vcf"))
                           .Select(v => new Variant(null, v, genome.Chromosomes[0]))
                           .Where(v => v.SecondAlleleString.Length == 1 && v.ReferenceAlleleString.Length == 1).ToList();
     List <Transcript> transcripts = geneModel.ApplyVariants(variants).ToList();
     List <Protein>    proteins    = transcripts.Select(t => t.Protein(null)).ToList();
 }
Пример #5
0
        public void ParseVcf([Values("testData/NA12878.knowledgebase.snapshot.20131119.b37.vcf.gz")] string fname)
        {
            Assert.IsTrue(File.Exists(fname));

            VCFParser vcp = new VCFParser(fname);
            var j = vcp.First();
            var ii = j.Genotypes;
            var i = vcp.Select(x => x.NoCallCount).Count();

            Console.WriteLine("Count: {0}", i);

        }
Пример #6
0
        static void Main(string[] args)
        {
            fname = "testData/NA12878.knowledgebase.snapshot.20131119.b37.vcf.gz";
            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
            sw.Start();
            VCFParser vcp = new VCFParser(fname);
            var       j   = vcp.First();
            var       ii  = j.Genotypes;
            var       i   = vcp.Select(x => x.NoCallCount).Count();

            sw.Stop();
            Console.WriteLine(sw.Elapsed.ToString());
        }
Пример #7
0
		static void Main (string[] args)
		{
			var fname = "testData/NA12878.knowledgebase.snapshot.20131119.b37.vcf.gz";
			System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch ();
			sw.Start ();
			VCFParser vcp = new VCFParser (fname);
			var j = vcp.First ();
			var ii = j.Genotypes.Count;
			var i = vcp.Select (x => x.NoCallCount).Count ();
			sw.Stop ();

            Console.WriteLine("Count: {0}/{1}", ii,i);

			Console.WriteLine ("Elapsed: {0}", sw.Elapsed);

		    var anyKey = Console.ReadKey();
		}
        public void TestMissenseMutation()
        {
            // Make a transcript
            Sequence seq = new Sequence(Alphabets.DNA, "AAA".Select(cc => (byte)cc).ToArray(), false);

            seq.ID = "1";
            Chromosome c = new Chromosome(seq, null);
            Gene       g = new Gene("", c, "", "+", 1, 3, null);
            Transcript t = new Transcript("", g, "", "+", 1, 3, "", null, null);
            Exon       x = new Exon(t, seq, "", 1, 3, seq.ID, "+", null, null);

            t.Exons = new List <Exon> {
                x
            };
            CDS cds = new CDS(t, seq.ID, "", "+", 1, 3, null, 0);

            t.CodingDomainSequences = new List <CDS> {
                cds
            };

            // Make a missense mutation
            // ugh.vcf has a homozygous variation that should change the codon from AAA to AGA, which code for K and R
            // # CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample
            // 1   2 .   A   G   64.77 . info   GT:AD:DP:GQ:PL  1/1:2,3:5:69:93,0,69
            List <Variant> variants = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "ugh.vcf")).Select(v => new Variant(null, v, new Chromosome(seq, null))).ToList();

            // Make sure it makes it into the DNA sequence
            t.Variants = new HashSet <Variant>(variants);
            List <Transcript> variantTranscripts = GeneModel.ApplyVariantsCombinitorially(t);

            Assert.AreEqual("AAA", SequenceExtensions.ConvertToString(t.Exons[0].Sequence));
            Assert.AreEqual("K", t.Protein().BaseSequence);
            Assert.AreEqual("AGA", SequenceExtensions.ConvertToString(variantTranscripts[0].Exons[0].Sequence));
            Assert.AreEqual("R", variantTranscripts[0].Protein().BaseSequence);

            // Make sure it gets annotated as a missense mutation
            Assert.IsTrue(variantTranscripts[0].VariantAnnotations.Any(str => str.Contains(FunctionalClass.MISSENSE.ToString())));
        }
Пример #9
0
        public override void LoadData(string filePath)
        {
            var vcfParser = new VCFParser(filePath);
            var header = vcfParser.Header;
            var collectionInformationList = new List<patient_variant_information>();
            var patient = new patient();
            // We pull out all of the metadata from the header (all lines) and write them as information
            // lines associated with this result.
            foreach (var headerItem in header.MetaDataInInputOrder)
            {
                if (headerItem.Key == "individual-id")
                {
                    var individualParts = headerItem.Value.Replace("<", "").Replace(">", "").Split(new char[] { ',' });
                    var individualData = individualParts.Select(x => x.Split(new char[] { '=' })).ToArray();
                    var mrnParts = individualData.FirstOrDefault(x => x[0] == "Dbxref")[1].Split(':');
                    patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0],
                        individualData.FirstOrDefault(x => x[0] == "First_name")[1],
                        individualData.FirstOrDefault(x => x[0] == "Last_name")[1],
                        DateTime.Parse(individualData.FirstOrDefault(x => x[0] == "DOB")[1]));
                }
                else if (headerItem.GetType() == typeof(VCFInfoHeaderLine))
                {
                    var info = headerItem as VCFInfoHeaderLine;
                    collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("INFO", info.ToString())));
                }
                else if (headerItem.GetType() == typeof(VCFFilterHeaderLine))
                {
                    var filter = headerItem as VCFFilterHeaderLine;
                    collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FILTER", filter.ToString())));
                }
                else if (headerItem.GetType() == typeof(VCFFormatHeaderLine))
                {
                    var format = headerItem as VCFFormatHeaderLine;
                    collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FORMAT", format.ToString())));
                }
                else
                {
                    collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), headerItem.Value));
                }
            }

            var reference = header.MetaDataInInputOrder.First(x => x.Key == "reference").Value;
            DateTime? resultDate = DateTime.ParseExact(header.MetaDataInInputOrder.First(x => x.Key == "fileDate").Value,
                "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None);
            var patientVariants = new List<patient_variants>();
            var featureInformationList = new Dictionary<patient_variants, List<patient_variant_information>>();
            while (vcfParser.MoveNext())
            {
                var current = vcfParser.Current;
                var variant = variantRepo.AddVariant(null, current.ID, "dbSNP",
                    current.Chr, current.Start, current.End,
                    reference, current.Reference.BaseString);
                var patientVariant = new patient_variants()
                {
                    patient_id = patient.id,
                    reference_id = variant.id,
                    resulted_on = resultDate,
                    variant_type = Enums.PatientVariantType.SNP
                };
                SetVariantValues(patientVariant, current);
                patientVariants.Add(patientVariant);

                var attributeList = new List<patient_variant_information>();
                foreach (var attribute in current.Attributes)
                {
                    attributeList.Add(AddVariantInformation(string.Format("VCF:{0}", attribute.Key), attribute.Value.ToString()));
                }

                if (current.FiltersMaybeNull != null)
                {
                    foreach (var filter in current.FiltersMaybeNull)
                    {
                        attributeList.Add(AddVariantInformation("VCF:Filter", filter));
                    }
                }

                foreach (var genotype in current.Genotypes)
                {
                    attributeList.Add(AddVariantInformation("VCF:Genotype", genotype.ToMHGRString()));
                }

                attributeList.Add(AddVariantInformation("VCF:Quality", current.PhredScaledQual.ToString()));
                attributeList.Add(AddVariantInformation("VCF:Filter", string.Join(",", current.Filters.ToArray())));
                featureInformationList.Add(patientVariant, attributeList);
            }

            // Save the collection to get its ID
            var source = sourceRepo.AddSource("VCF", "VCF file");
            var file = AddResultFile(filePath, source);
            var collection = patientRepo.AddCollection(patient, file);

            // Save the collection-level header data
            collectionInformationList.ForEach(x => x.item_id = collection.id);
            variantRepo.AddPatientVariantInformationList(collectionInformationList);
            variantRepo.AddPatientVariants(patientVariants);

            // Save the individual attributes associated with each feature.
            // Must be done after the patient variants are written to DB (above), since we
            // rely on the ID being set.
            foreach (var pair in featureInformationList)
            {
                foreach (var attribute in pair.Value)
                {
                    attribute.item_id = pair.Key.id;
                }
                variantRepo.AddPatientVariantInformationList(pair.Value);
            }

            variantRepo.AddPatientVariantsToCollection(collection, patientVariants);

            featureInformationList.Clear();
            collectionInformationList.Clear();
            patientVariants.Clear();
        }
Пример #10
0
 public void test()
 {
     VCFParser             parser  = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, @"testData", @"NA12878.knowledgebase.snapshot.20131119.b37.vcf.gz"));
     List <VariantContext> context = parser.Select(x => x).ToList();
 }
Пример #11
0
        public override void LoadData(string filePath)
        {
            var vcfParser = new VCFParser(filePath);
            var header = vcfParser.Header;
            var patient = new patient();

            var source = sourceRepo.AddSource("VCF", "VCF file");
            var file = AddResultFile(filePath, source);

            // Process the file-level pragmas
            result_entities rootEntity = new result_entities()
            {
                attribute_id = EntityRepository.GetAttribute(null, null, "Variant Call Format result", null).id,
                result_file_id = file.id
            };

            // We pull out all of the metadata from the header (all lines) and write them as information
            // lines associated with this result.
            var headerEntities = new List<result_entities>();
            foreach (var headerItem in header.MetaDataInInputOrder)
            {
                if (headerItem.Key == "individual-id")
                {
                    var individualParts = headerItem.Value.Replace("<", "").Replace(">", "").Split(new char[] { ',' });
                    var individualData = individualParts.Select(x => x.Split(new char[] { '=' })).ToArray();
                    var mrnParts = individualData.FirstOrDefault(x => x[0] == "Dbxref")[1].Split(':');
                    patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0],
                        individualData.FirstOrDefault(x => x[0] == "First_name")[1],
                        individualData.FirstOrDefault(x => x[0] == "Last_name")[1],
                        DateTime.Parse(individualData.FirstOrDefault(x => x[0] == "DOB")[1]));
                }
                else if (headerItem.Key == "fileDate")
                {
                    DateTime resultDate = DateTime.ParseExact(headerItem.Value, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None);
                    headerEntities.Add(CreateEntityAttribute("Resulted on", 0, file.id, rootEntity, resultDate.ToShortDateString()));
                }
                else if (headerItem.GetType() == typeof(VCFInfoHeaderLine))
                {
                    var info = headerItem as VCFInfoHeaderLine;
                    var infoEntity = CreateEntityAttribute("INFO", 0, file.id, rootEntity, null);
                    headerEntities.Add(infoEntity);
                    headerEntities.Add(CreateEntityAttribute("ID", 0, file.id, infoEntity, info.ID));
                    headerEntities.Add(CreateEntityAttribute("Number", 0, file.id, infoEntity, info.CountType.ToString()));
                    headerEntities.Add(CreateEntityAttribute("Type", 0, file.id, infoEntity, info.Type.ToString()));
                    headerEntities.Add(CreateEntityAttribute("Description", 0, file.id, infoEntity, info.Description));
                }
                else if (headerItem.GetType() == typeof(VCFFilterHeaderLine))
                {
                    var filter = headerItem as VCFFilterHeaderLine;
                    var filterEntity = CreateEntityAttribute("FILTER", 0, file.id, rootEntity, null);
                    headerEntities.Add(filterEntity);
                    foreach (var field in filter.GenericFields())
                    {
                        headerEntities.Add(CreateEntityAttribute(field.Key, 0, file.id, filterEntity, field.Value));
                    }
                }
                else if (headerItem.GetType() == typeof(VCFFormatHeaderLine))
                {
                    var format = headerItem as VCFFormatHeaderLine;
                    var formatEntity = CreateEntityAttribute("FORMAT", 0, file.id, rootEntity, null);
                    headerEntities.Add(formatEntity);
                    headerEntities.Add(CreateEntityAttribute("ID", 0, file.id, formatEntity, format.ID));
                    headerEntities.Add(CreateEntityAttribute("Number", 0, file.id, formatEntity, format.CountType.ToString()));
                    headerEntities.Add(CreateEntityAttribute("Type", 0, file.id, formatEntity, format.Type.ToString()));
                    headerEntities.Add(CreateEntityAttribute("Description", 0, file.id, formatEntity, format.Description));
                }
                else
                {
                    var headerEntity = CreateEntityAttribute(headerItem.Key, 0, file.id, rootEntity, headerItem.Value);
                    headerEntities.Add(headerEntity);
                }
            }

            rootEntity.patient_id = patient.id;
            headerEntities.ForEach(x => x.patient_id = patient.id);

            var variantEntities = new List<result_entities>();
            while (vcfParser.MoveNext())
            {
                var current = vcfParser.Current;
                result_entities variantEntity = new result_entities()
                {
                    attribute_id = EntityRepository.GetAttribute(null, null, "Variant Call Format variant", null).id,
                    result_file_id = file.id,
                    patient_id = patient.id,
                    parent = rootEntity
                };
                variantEntities.Add(variantEntity);

                result_entities snpEntity = new result_entities()
                {
                    attribute_id = EntityRepository.GetAttribute(current.ID, "dbSNP", null, null).id,
                    result_file_id = file.id,
                    patient_id = patient.id,
                    parent = variantEntity
                };
                variantEntities.Add(snpEntity);

                SetVariantValues(current, patient.id, file.id, snpEntity, variantEntities);

                variantEntities.Add(CreateEntityAttribute("Chromosome", patient.id, file.id, variantEntity, current.Chr));
                variantEntities.Add(CreateEntityAttribute("Start position", patient.id, file.id, variantEntity, current.Start.ToString()));
                variantEntities.Add(CreateEntityAttribute("End position", patient.id, file.id, variantEntity, current.End.ToString()));
                variantEntities.Add(CreateEntityAttribute("Reference base", patient.id, file.id, variantEntity, current.Reference.BaseString));
                variantEntities.Add(CreateEntityAttribute("Quality", patient.id, file.id, variantEntity, current.PhredScaledQual.ToString()));
                foreach (var attr in current.Attributes)
                {
                    variantEntities.Add(CreateEntityAttribute(string.Format("INFO:{0}", attr.Key), patient.id, file.id, variantEntity, attr.Value.ToString()));
                }

                if (current.FiltersMaybeNull != null)
                {
                    foreach (var filter in current.FiltersMaybeNull)
                    {
                        variantEntities.Add(CreateEntityAttribute(string.Format("FILTER:{0}", filter), patient.id, file.id, variantEntity, string.Empty));
                    }
                }

                //foreach (var genotype in current.Genotypes)
                //{
                //    attributeList.Add(AddVariantInformation("VCF:Genotype", genotype.ToMHGRString()));
                //}

                //attributeList.Add(AddVariantInformation("VCF:Quality", current.PhredScaledQual.ToString()));
                //attributeList.Add(AddVariantInformation("VCF:Filter", string.Join(",", current.Filters.ToArray())));
                //featureInformationList.Add(patientVariant, attributeList);
            }

            entityRepo.AddVCF(rootEntity, headerEntities, variantEntities);
        }