public patient_result_collections AddSnps(patient patient, result_files file, DateTime resultedOn, List<SnpResult> snps) { // Create a collection var collection = patientRepo.AddCollection(patient, file); // Create patient variant entries List<patient_variants> variants = new List<patient_variants>(); foreach (var snp in snps) { var variant = GetVariant(snp.RSID, "dbSNP", snp.Position, snp.Position); var patientVariant = new patient_variants() { patient_id = patient.id, variant_type = Enums.PatientVariantType.SNP, reference_id = variant.id, resulted_on = resultedOn, value1 = snp.Genotype[0].ToString(), value2 = snp.Genotype[1].ToString() }; variants.Add(patientVariant); } entities.patient_variants.AddRange(variants); entities.SaveChanges(); // Finally, link the collection to the SNPs foreach (var variant in variants) { var member = new patient_result_members() { collection_id = collection.id, member_id = variant.id, member_type = Enums.ResultMemberType.Variant, }; entities.patient_result_members.Add(member); } entities.SaveChanges(); return collection; }
public patient_result_collections AddStarVariants(patient patient, result_files file, DateTime resultedOn, List<StarVariantResult> stars) { // Create a collection var collection = patientRepo.AddCollection(patient, file); // Create patient variant entries List<patient_variants> variants = new List<patient_variants>(); foreach (var star in stars) { var gene = AddGene(star.Gene, star.Gene, null, null, null); var variant = AddVariant(star.Gene, star.Result, "Star Variant", null, null, null, null, null); string[] splitStars = star.Result.Split(new string[]{"*"}, StringSplitOptions.RemoveEmptyEntries); var patientVariant = new patient_variants() { patient_id = patient.id, variant_type = Enums.PatientVariantType.StarVariant, reference_id = variant.id, resulted_on = resultedOn, value1 = splitStars[0], value2 = splitStars[1] }; variants.Add(patientVariant); } entities.patient_variants.AddRange(variants); entities.SaveChanges(); // Finally, link the collection to the stars foreach (var variant in variants) { var member = new patient_result_members() { collection_id = collection.id, member_id = variant.id, member_type = Enums.ResultMemberType.Variant, }; entities.patient_result_members.Add(member); } entities.SaveChanges(); return collection; }
public override void LoadData(string filePath) { var vcfParser = new VCFParser(filePath); var header = vcfParser.Header; var collectionInformationList = new List<patient_variant_information>(); var patient = new patient(); // We pull out all of the metadata from the header (all lines) and write them as information // lines associated with this result. foreach (var headerItem in header.MetaDataInInputOrder) { if (headerItem.Key == "individual-id") { var individualParts = headerItem.Value.Replace("<", "").Replace(">", "").Split(new char[] { ',' }); var individualData = individualParts.Select(x => x.Split(new char[] { '=' })).ToArray(); var mrnParts = individualData.FirstOrDefault(x => x[0] == "Dbxref")[1].Split(':'); patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0], individualData.FirstOrDefault(x => x[0] == "First_name")[1], individualData.FirstOrDefault(x => x[0] == "Last_name")[1], DateTime.Parse(individualData.FirstOrDefault(x => x[0] == "DOB")[1])); } else if (headerItem.GetType() == typeof(VCFInfoHeaderLine)) { var info = headerItem as VCFInfoHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("INFO", info.ToString()))); } else if (headerItem.GetType() == typeof(VCFFilterHeaderLine)) { var filter = headerItem as VCFFilterHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FILTER", filter.ToString()))); } else if (headerItem.GetType() == typeof(VCFFormatHeaderLine)) { var format = headerItem as VCFFormatHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FORMAT", format.ToString()))); } else { collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), headerItem.Value)); } } var reference = header.MetaDataInInputOrder.First(x => x.Key == "reference").Value; DateTime? resultDate = DateTime.ParseExact(header.MetaDataInInputOrder.First(x => x.Key == "fileDate").Value, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None); var patientVariants = new List<patient_variants>(); var featureInformationList = new Dictionary<patient_variants, List<patient_variant_information>>(); while (vcfParser.MoveNext()) { var current = vcfParser.Current; var variant = variantRepo.AddVariant(null, current.ID, "dbSNP", current.Chr, current.Start, current.End, reference, current.Reference.BaseString); var patientVariant = new patient_variants() { patient_id = patient.id, reference_id = variant.id, resulted_on = resultDate, variant_type = Enums.PatientVariantType.SNP }; SetVariantValues(patientVariant, current); patientVariants.Add(patientVariant); var attributeList = new List<patient_variant_information>(); foreach (var attribute in current.Attributes) { attributeList.Add(AddVariantInformation(string.Format("VCF:{0}", attribute.Key), attribute.Value.ToString())); } if (current.FiltersMaybeNull != null) { foreach (var filter in current.FiltersMaybeNull) { attributeList.Add(AddVariantInformation("VCF:Filter", filter)); } } foreach (var genotype in current.Genotypes) { attributeList.Add(AddVariantInformation("VCF:Genotype", genotype.ToMHGRString())); } attributeList.Add(AddVariantInformation("VCF:Quality", current.PhredScaledQual.ToString())); attributeList.Add(AddVariantInformation("VCF:Filter", string.Join(",", current.Filters.ToArray()))); featureInformationList.Add(patientVariant, attributeList); } // Save the collection to get its ID var source = sourceRepo.AddSource("VCF", "VCF file"); var file = AddResultFile(filePath, source); var collection = patientRepo.AddCollection(patient, file); // Save the collection-level header data collectionInformationList.ForEach(x => x.item_id = collection.id); variantRepo.AddPatientVariantInformationList(collectionInformationList); variantRepo.AddPatientVariants(patientVariants); // Save the individual attributes associated with each feature. // Must be done after the patient variants are written to DB (above), since we // rely on the ID being set. foreach (var pair in featureInformationList) { foreach (var attribute in pair.Value) { attribute.item_id = pair.Key.id; } variantRepo.AddPatientVariantInformationList(pair.Value); } variantRepo.AddPatientVariantsToCollection(collection, patientVariants); featureInformationList.Clear(); collectionInformationList.Clear(); patientVariants.Clear(); }
private void SetVariantValues(patient_variants variant, VariantContext context) { if (context.Genotypes.Count >= 1) { var alleles = context.Genotypes[0].Alleles; variant.value1 = alleles[0].DisplayString; if (alleles.Count > 1) { variant.value2 = alleles[1].DisplayString; } else { variant.value2 = variant.value1; } } }
public override void LoadData(string filePath) { string[] data = File.ReadAllLines(filePath); List<Pragma> pragmas = new List<Pragma>(); List<string> comments = new List<string>(); List<Feature> features = new List<Feature>(); foreach (var row in data) { switch (GVFParserHelper.GetRowType(row)) { case GVFParserHelper.RowType.Pragma: { pragmas.Add(pragmaParser.Parse(row)); break; } case GVFParserHelper.RowType.Comment: { comments.Add(row.Trim(CommentTrimChars)); break; } case GVFParserHelper.RowType.Data: { features.Add(GVFParserHelper.ParseFeature(row)); break; } } } var source = sourceRepo.AddSource("GVF", "GVF file"); var file = AddResultFile(filePath, source); patient patient = null; var collectionInformationList = new List<patient_variant_information>(); string genomeBuild = null; DateTime? resultDate = null; // Process the file-level pragmas foreach (var pragma in pragmas) { if (pragma.Name == "individual-id") { var mrnParts = pragma.Tags.FirstOrDefault(x => x.Name == "Dbxref").Value.Split(':'); patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0], pragma.Tags.FirstOrDefault(x => x.Name == "First_name").Value, pragma.Tags.FirstOrDefault(x => x.Name == "Last_name").Value, DateTime.Parse(pragma.Tags.FirstOrDefault(x => x.Name == "DOB").Value)); } else if (pragma.Name == "phenotype-description") { var phenotype = phenotypeRepo.GetPhenotypeByExternalId(CreatePhenotype(pragma)); } else if (pragma.Name == "genome-build") { genomeBuild = pragma.Value; } else if (pragma.Name == "file-date") { resultDate = DateTime.Parse(pragma.Value); } else { if (pragma.Tags.Count > 0) { foreach (var tag in pragma.Tags) { collectionInformationList.Add(AddPragmaInformation(string.Format("GVF:{0}:{1}", pragma.Name, tag.Name), tag.Value)); } } else { collectionInformationList.Add(AddPragmaInformation(string.Format("GVF:{0}", pragma.Name), pragma.Value)); } } } // Convert all comments into individual variant information entries foreach (var comment in comments) { collectionInformationList.Add(AddPragmaInformation("GVF:Comment", comment)); } // Go through the individual features and build up both reference variants and // the patient-level variants var patientVariants = new List<patient_variants>(); var featureInformationList = new Dictionary<patient_variants, List<patient_variant_information> >(); foreach (var feature in features) { var variant = variantRepo.AddVariant(null, feature.Attributes.FirstOrDefault(x => x.Name == "ID").Value, "dbSNP", feature.Chromosome, feature.StartPosition, feature.EndPosition, genomeBuild, feature.Attributes.FirstOrDefault(x => x.Name == "Reference_seq").Value); var patientVariant = new patient_variants() { patient_id = patient.id, reference_id = variant.id, resulted_on = resultDate, variant_type = Enums.PatientVariantType.SNP }; SetVariantValues(patientVariant, feature.Attributes.FirstOrDefault(x => x.Name == "Variant_seq").Value, feature.Attributes.FirstOrDefault(x => x.Name == "Genotype").Value); patientVariants.Add(patientVariant); var attributeList = new List<patient_variant_information>(); attributeList.Add(AddFeatureInformation("GVF:Score", feature.Score)); attributeList.Add(AddFeatureInformation("GVF:Strand", feature.Strand)); attributeList.Add(AddFeatureInformation("GVF:Phase", feature.Phase)); foreach (var attribute in feature.Attributes.Where(x => x.Name != "Variant_seq" && x.Name != "Reference_seq" && x.Name != "ID")) { attributeList.Add(AddFeatureInformation(string.Format("GVF:{0}", attribute.Name), attribute.Value)); } featureInformationList.Add(patientVariant, attributeList); } // Save the collection to get its ID var collection = patientRepo.AddCollection(patient, file); // Save the collection-level pragma data collectionInformationList.ForEach(x => x.item_id = collection.id); variantRepo.AddPatientVariantInformationList(collectionInformationList); variantRepo.AddPatientVariants(patientVariants); // Save the individual attributes associated with each feature. // Must be done after the patient variants are written to DB (above), since we // rely on the ID being set. foreach (var pair in featureInformationList) { foreach (var attribute in pair.Value) { attribute.item_id = pair.Key.id; } variantRepo.AddPatientVariantInformationList(pair.Value); } variantRepo.AddPatientVariantsToCollection(collection, patientVariants); }
private void SetVariantValues(patient_variants variant, string value, string genotype) { if (genotype == "homozygous") { variant.value1 = value; variant.value2 = value; } else if (genotype == "heterozygous") { string[] values = value.Split(','); variant.value1 = values[0]; variant.value2 = values[1]; } }