public VcfFilter(PsaraOptions settings) { var vcfIn = settings.VcfPath; var vcfName = Path.GetFileName(vcfIn); _originalHeaderLines = AlleleReader.GetAllHeaderLines(vcfIn); _geometricFilter = new GeometricFilter(settings.GeometricFilterParameters); _psaraOptions = (PsaraOptions)VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(settings, _originalHeaderLines, vcfIn); _outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf")); _outputFile = _outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf"); }
private static void FilterAndStreamOut(List <CalledAllele> alleles, VcfFileWriter writer, GeometricFilter filter) { alleles = filter.DoFiltering(alleles); try { writer.Write(alleles); } catch (Exception ex) { Logger.WriteWarningToLog("Problem writing alleles to vcf."); Logger.WriteExceptionToLog(ex); return; } writer.FlushBuffer(); }
public static void DoFiltering(PsaraOptions settings) { var geometricFilter = new GeometricFilter(settings.GeometricFilterParameters); //maybe expand to add other filters.. var vcfIn = settings.InputVcf; var vcfName = Path.GetFileName(vcfIn); var outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf")); outputFile = outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf"); Logger.WriteToLog("filtering " + vcfIn + "..."); if (File.Exists(outputFile)) { File.Delete(outputFile); } List <string> header = VcfReader.GetAllHeaderLines(vcfIn); string cmdLine = "##Psara_cmdline=" + settings.QuotedCommandLineArgumentsString; VcfWriterConfig config = GetWriterConfigToMatchInputVcf(vcfIn); using (PsaraVcfWriter writer = new PsaraVcfWriter(outputFile, config, new VcfWriterInputContext(), header, cmdLine)) { writer.WriteHeader(); using (VcfReader reader = new VcfReader(vcfIn, false)) { var backLogVcfVariant = new VcfVariant(); var coLocatedAlleles = new List <CalledAllele>(); var moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant); var incomingBatch = new List <CalledAllele>(); while (moreVariantsInVcf) { if (incomingBatch.Count == 0) { incomingBatch = moreVariantsInVcf ? VcfVariantUtilities.Convert(new List <VcfVariant> { backLogVcfVariant }, config.ShouldOutputRcCounts, config.ShouldOutputTsCounts, false).ToList() : null; moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant); } if ((coLocatedAlleles.Count == 0) || AreColocated(coLocatedAlleles, incomingBatch)) { coLocatedAlleles.AddRange(incomingBatch); incomingBatch.Clear(); //colocated alleles are left behind } else { FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter); coLocatedAlleles.Clear(); //incomingBatch alleles are left behind } } //if you get here, there is no more unprocessed vcf variants but there could be //colocated or an incoming batch of alleles left over. We need to write them to file before exiting. FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter); FilterAndStreamOut(incomingBatch, writer, geometricFilter); } } }
public static TypeOfUpdateNeeded UpdateColocatedAlleles(VcfConsumerAppOptions appOptions, GeometricFilter filter, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles) { outAlleles = filter.DoFiltering(inAlleles); return(TypeOfUpdateNeeded.Modify); }