public static List <string> GetAllHeaderLines(string vcfPath) { List <string> header; using (var reader = new AlleleReader(vcfPath)) { header = reader.HeaderLines; } return(header); }
/// <summary> /// Load a list of all variants in a file. This is memory-intensive; don't do this for whole-genome vcf files! /// </summary> public static List <CalledAllele> GetAllVariantsInFile(string vcfPath) { List <CalledAllele> allVariants = new List <CalledAllele>(); using (AlleleReader reader = new AlleleReader(vcfPath)) { foreach (CalledAllele variant in reader.GetVariants()) { allVariants.Add(variant); } } return(allVariants); }
public static Dictionary <string, List <CandidateAllele> > GetVariantsByChromosome(this AlleleReader reader, bool variantsOnly = false, bool flagIsKnown = false, List <AlleleCategory> typeFilter = null, Func <CandidateAllele, bool> doSkipCandidate = null) { var lookup = new Dictionary <string, List <CandidateAllele> >(); var calledVariants = reader.GetVariants(); foreach (var calledVariant in calledVariants) { var candidate = BackToCandiate(calledVariant); if (candidate.Type != AlleleCategory.Unsupported) { if (variantsOnly && candidate.Type == AlleleCategory.Reference) { continue; } if (typeFilter != null && !typeFilter.Contains(candidate.Type)) { continue; } if (doSkipCandidate != null && doSkipCandidate(candidate)) { continue; } if (flagIsKnown) { candidate.IsKnown = true; } if (!lookup.ContainsKey(candidate.Chromosome)) { lookup[candidate.Chromosome] = new List <CandidateAllele>(); } lookup[candidate.Chromosome].Add(candidate); } } return(lookup); }
/// <summary> /// Take in a vcf, do stuff to it, write out a vcf. Streamed line by line, loci by loci, so as not to blow up your computer. /// </summary> /// <param name="vcfOut"> the output file name</param> /// <param name="options"> all the parameters associated with writing out a vcf</param> /// <param name="recalibrationData">the data you need for doing your "stuff" </param> /// <param name="whatToDoWithSingleAllele">how you want to change each allele</param> /// <param name="whatToDoWithCoLocatedAlleles">how you want to change each set of alleles, by loci</param> /// <param name="canSkipLinesWithoutProcessing">when you can skip lines (saves CPU time)</param> /// <param name="getVcfFileWriter">what your special vcf writer should be, includes special header lines, etc</param> /// <param name="shouldTrimComplexAlleles">if ACGT-> ACCT is ok, or if you want it trimmed to G -> C. this might affect position and ordering. Generally turn if OFF for processing vcfs, post scylla. </param> private static void UpdateVcf(string vcfOut, VcfConsumerAppOptions options, bool shouldTrimComplexAlleles, T recalibrationData, UpdateSingleAlleleMethod whatToDoWithSingleAllele, UpdateCoLocatedAllelesMethod whatToDoWithCoLocatedAlleles, CanSkipVcfLinesMethod canSkipLinesWithoutProcessing, GetVcfFileWriter getVcfFileWriter) { using (AlleleReader reader = new AlleleReader(options.VcfPath, shouldTrimComplexAlleles)) { using (VcfFileWriter writer = getVcfFileWriter(options, vcfOut)) { writer.WriteHeader(); writer.FlushBuffer(); var variantListFromFile = new List <CalledAllele>() { }; string incomingHangingLine = null; string outgoingHangingLine = null; while (true) { //get the next group to process incomingHangingLine = outgoingHangingLine; var coLocatedVcfLinesToProcess = reader.CloseColocatedLines(incomingHangingLine, out outgoingHangingLine); //how we know we are done if (coLocatedVcfLinesToProcess.Count == 0) { break; } bool updateNeededForLocus = false; TypeOfUpdateNeeded updatedNeededForLine = canSkipLinesWithoutProcessing(coLocatedVcfLinesToProcess); switch (updatedNeededForLine) { case TypeOfUpdateNeeded.NoChangeNeeded: writer.Write(coLocatedVcfLinesToProcess); break; case TypeOfUpdateNeeded.Modify: //then we need to change them into alleles and do stuff to them variantListFromFile = AlleleReader.VcfLinesToAlleles(coLocatedVcfLinesToProcess); List <CalledAllele> modifiedVariantListToWrite = WhatToDoToAlleles(options, recalibrationData, whatToDoWithSingleAllele, whatToDoWithCoLocatedAlleles, variantListFromFile, ref updateNeededForLocus); if (updateNeededForLocus) { writer.Write(modifiedVariantListToWrite); } else { writer.Write(coLocatedVcfLinesToProcess); } break; case TypeOfUpdateNeeded.DeleteCompletely: default: break; } } } } }