public override void LoadData(string filePath) { var vcfParser = new VCFParser(filePath); var header = vcfParser.Header; var collectionInformationList = new List<patient_variant_information>(); var patient = new patient(); // We pull out all of the metadata from the header (all lines) and write them as information // lines associated with this result. foreach (var headerItem in header.MetaDataInInputOrder) { if (headerItem.Key == "individual-id") { var individualParts = headerItem.Value.Replace("<", "").Replace(">", "").Split(new char[] { ',' }); var individualData = individualParts.Select(x => x.Split(new char[] { '=' })).ToArray(); var mrnParts = individualData.FirstOrDefault(x => x[0] == "Dbxref")[1].Split(':'); patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0], individualData.FirstOrDefault(x => x[0] == "First_name")[1], individualData.FirstOrDefault(x => x[0] == "Last_name")[1], DateTime.Parse(individualData.FirstOrDefault(x => x[0] == "DOB")[1])); } else if (headerItem.GetType() == typeof(VCFInfoHeaderLine)) { var info = headerItem as VCFInfoHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("INFO", info.ToString()))); } else if (headerItem.GetType() == typeof(VCFFilterHeaderLine)) { var filter = headerItem as VCFFilterHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FILTER", filter.ToString()))); } else if (headerItem.GetType() == typeof(VCFFormatHeaderLine)) { var format = headerItem as VCFFormatHeaderLine; collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), CleanHeaderValue("FORMAT", format.ToString()))); } else { collectionInformationList.Add(AddHeaderInformation(string.Format("VCF:{0}", headerItem.Key), headerItem.Value)); } } var reference = header.MetaDataInInputOrder.First(x => x.Key == "reference").Value; DateTime? resultDate = DateTime.ParseExact(header.MetaDataInInputOrder.First(x => x.Key == "fileDate").Value, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None); var patientVariants = new List<patient_variants>(); var featureInformationList = new Dictionary<patient_variants, List<patient_variant_information>>(); while (vcfParser.MoveNext()) { var current = vcfParser.Current; var variant = variantRepo.AddVariant(null, current.ID, "dbSNP", current.Chr, current.Start, current.End, reference, current.Reference.BaseString); var patientVariant = new patient_variants() { patient_id = patient.id, reference_id = variant.id, resulted_on = resultDate, variant_type = Enums.PatientVariantType.SNP }; SetVariantValues(patientVariant, current); patientVariants.Add(patientVariant); var attributeList = new List<patient_variant_information>(); foreach (var attribute in current.Attributes) { attributeList.Add(AddVariantInformation(string.Format("VCF:{0}", attribute.Key), attribute.Value.ToString())); } if (current.FiltersMaybeNull != null) { foreach (var filter in current.FiltersMaybeNull) { attributeList.Add(AddVariantInformation("VCF:Filter", filter)); } } foreach (var genotype in current.Genotypes) { attributeList.Add(AddVariantInformation("VCF:Genotype", genotype.ToMHGRString())); } attributeList.Add(AddVariantInformation("VCF:Quality", current.PhredScaledQual.ToString())); attributeList.Add(AddVariantInformation("VCF:Filter", string.Join(",", current.Filters.ToArray()))); featureInformationList.Add(patientVariant, attributeList); } // Save the collection to get its ID var source = sourceRepo.AddSource("VCF", "VCF file"); var file = AddResultFile(filePath, source); var collection = patientRepo.AddCollection(patient, file); // Save the collection-level header data collectionInformationList.ForEach(x => x.item_id = collection.id); variantRepo.AddPatientVariantInformationList(collectionInformationList); variantRepo.AddPatientVariants(patientVariants); // Save the individual attributes associated with each feature. // Must be done after the patient variants are written to DB (above), since we // rely on the ID being set. foreach (var pair in featureInformationList) { foreach (var attribute in pair.Value) { attribute.item_id = pair.Key.id; } variantRepo.AddPatientVariantInformationList(pair.Value); } variantRepo.AddPatientVariantsToCollection(collection, patientVariants); featureInformationList.Clear(); collectionInformationList.Clear(); patientVariants.Clear(); }
public override void LoadData(string filePath) { var vcfParser = new VCFParser(filePath); var header = vcfParser.Header; var patient = new patient(); var source = sourceRepo.AddSource("VCF", "VCF file"); var file = AddResultFile(filePath, source); // Process the file-level pragmas result_entities rootEntity = new result_entities() { attribute_id = EntityRepository.GetAttribute(null, null, "Variant Call Format result", null).id, result_file_id = file.id }; // We pull out all of the metadata from the header (all lines) and write them as information // lines associated with this result. var headerEntities = new List<result_entities>(); foreach (var headerItem in header.MetaDataInInputOrder) { if (headerItem.Key == "individual-id") { var individualParts = headerItem.Value.Replace("<", "").Replace(">", "").Split(new char[] { ',' }); var individualData = individualParts.Select(x => x.Split(new char[] { '=' })).ToArray(); var mrnParts = individualData.FirstOrDefault(x => x[0] == "Dbxref")[1].Split(':'); patient = patientRepo.AddPatient(mrnParts[1], mrnParts[0], individualData.FirstOrDefault(x => x[0] == "First_name")[1], individualData.FirstOrDefault(x => x[0] == "Last_name")[1], DateTime.Parse(individualData.FirstOrDefault(x => x[0] == "DOB")[1])); } else if (headerItem.Key == "fileDate") { DateTime resultDate = DateTime.ParseExact(headerItem.Value, "yyyyMMdd", CultureInfo.InvariantCulture, DateTimeStyles.None); headerEntities.Add(CreateEntityAttribute("Resulted on", 0, file.id, rootEntity, resultDate.ToShortDateString())); } else if (headerItem.GetType() == typeof(VCFInfoHeaderLine)) { var info = headerItem as VCFInfoHeaderLine; var infoEntity = CreateEntityAttribute("INFO", 0, file.id, rootEntity, null); headerEntities.Add(infoEntity); headerEntities.Add(CreateEntityAttribute("ID", 0, file.id, infoEntity, info.ID)); headerEntities.Add(CreateEntityAttribute("Number", 0, file.id, infoEntity, info.CountType.ToString())); headerEntities.Add(CreateEntityAttribute("Type", 0, file.id, infoEntity, info.Type.ToString())); headerEntities.Add(CreateEntityAttribute("Description", 0, file.id, infoEntity, info.Description)); } else if (headerItem.GetType() == typeof(VCFFilterHeaderLine)) { var filter = headerItem as VCFFilterHeaderLine; var filterEntity = CreateEntityAttribute("FILTER", 0, file.id, rootEntity, null); headerEntities.Add(filterEntity); foreach (var field in filter.GenericFields()) { headerEntities.Add(CreateEntityAttribute(field.Key, 0, file.id, filterEntity, field.Value)); } } else if (headerItem.GetType() == typeof(VCFFormatHeaderLine)) { var format = headerItem as VCFFormatHeaderLine; var formatEntity = CreateEntityAttribute("FORMAT", 0, file.id, rootEntity, null); headerEntities.Add(formatEntity); headerEntities.Add(CreateEntityAttribute("ID", 0, file.id, formatEntity, format.ID)); headerEntities.Add(CreateEntityAttribute("Number", 0, file.id, formatEntity, format.CountType.ToString())); headerEntities.Add(CreateEntityAttribute("Type", 0, file.id, formatEntity, format.Type.ToString())); headerEntities.Add(CreateEntityAttribute("Description", 0, file.id, formatEntity, format.Description)); } else { var headerEntity = CreateEntityAttribute(headerItem.Key, 0, file.id, rootEntity, headerItem.Value); headerEntities.Add(headerEntity); } } rootEntity.patient_id = patient.id; headerEntities.ForEach(x => x.patient_id = patient.id); var variantEntities = new List<result_entities>(); while (vcfParser.MoveNext()) { var current = vcfParser.Current; result_entities variantEntity = new result_entities() { attribute_id = EntityRepository.GetAttribute(null, null, "Variant Call Format variant", null).id, result_file_id = file.id, patient_id = patient.id, parent = rootEntity }; variantEntities.Add(variantEntity); result_entities snpEntity = new result_entities() { attribute_id = EntityRepository.GetAttribute(current.ID, "dbSNP", null, null).id, result_file_id = file.id, patient_id = patient.id, parent = variantEntity }; variantEntities.Add(snpEntity); SetVariantValues(current, patient.id, file.id, snpEntity, variantEntities); variantEntities.Add(CreateEntityAttribute("Chromosome", patient.id, file.id, variantEntity, current.Chr)); variantEntities.Add(CreateEntityAttribute("Start position", patient.id, file.id, variantEntity, current.Start.ToString())); variantEntities.Add(CreateEntityAttribute("End position", patient.id, file.id, variantEntity, current.End.ToString())); variantEntities.Add(CreateEntityAttribute("Reference base", patient.id, file.id, variantEntity, current.Reference.BaseString)); variantEntities.Add(CreateEntityAttribute("Quality", patient.id, file.id, variantEntity, current.PhredScaledQual.ToString())); foreach (var attr in current.Attributes) { variantEntities.Add(CreateEntityAttribute(string.Format("INFO:{0}", attr.Key), patient.id, file.id, variantEntity, attr.Value.ToString())); } if (current.FiltersMaybeNull != null) { foreach (var filter in current.FiltersMaybeNull) { variantEntities.Add(CreateEntityAttribute(string.Format("FILTER:{0}", filter), patient.id, file.id, variantEntity, string.Empty)); } } //foreach (var genotype in current.Genotypes) //{ // attributeList.Add(AddVariantInformation("VCF:Genotype", genotype.ToMHGRString())); //} //attributeList.Add(AddVariantInformation("VCF:Quality", current.PhredScaledQual.ToString())); //attributeList.Add(AddVariantInformation("VCF:Filter", string.Join(",", current.Filters.ToArray()))); //featureInformationList.Add(patientVariant, attributeList); } entityRepo.AddVCF(rootEntity, headerEntities, variantEntities); }