Beispiel #1
0
        public VcfFilter(PsaraOptions settings)
        {
            var vcfIn   = settings.VcfPath;
            var vcfName = Path.GetFileName(vcfIn);

            _originalHeaderLines = AlleleReader.GetAllHeaderLines(vcfIn);
            _geometricFilter     = new GeometricFilter(settings.GeometricFilterParameters);
            _psaraOptions        = (PsaraOptions)VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(settings, _originalHeaderLines, vcfIn);
            _outputFile          = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf"));
            _outputFile          = _outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf");
        }
Beispiel #2
0
        private static void FilterAndStreamOut(List <CalledAllele> alleles, VcfFileWriter writer, GeometricFilter filter)
        {
            alleles = filter.DoFiltering(alleles);


            try
            {
                writer.Write(alleles);
            }
            catch (Exception ex)
            {
                Logger.WriteWarningToLog("Problem writing alleles to vcf.");
                Logger.WriteExceptionToLog(ex);
                return;
            }

            writer.FlushBuffer();
        }
Beispiel #3
0
        public static void DoFiltering(PsaraOptions settings)
        {
            var geometricFilter = new GeometricFilter(settings.GeometricFilterParameters);
            //maybe expand to add other filters..

            var vcfIn   = settings.InputVcf;
            var vcfName = Path.GetFileName(vcfIn);

            var outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf"));

            outputFile = outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf");

            Logger.WriteToLog("filtering " + vcfIn + "...");

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            List <string>   header  = VcfReader.GetAllHeaderLines(vcfIn);
            string          cmdLine = "##Psara_cmdline=" + settings.QuotedCommandLineArgumentsString;
            VcfWriterConfig config  = GetWriterConfigToMatchInputVcf(vcfIn);

            using (PsaraVcfWriter writer = new PsaraVcfWriter(outputFile, config, new VcfWriterInputContext(), header, cmdLine))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(vcfIn, false))
                {
                    var backLogVcfVariant = new VcfVariant();
                    var coLocatedAlleles  = new List <CalledAllele>();
                    var moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                    var incomingBatch     = new List <CalledAllele>();


                    while (moreVariantsInVcf)
                    {
                        if (incomingBatch.Count == 0)
                        {
                            incomingBatch = moreVariantsInVcf ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                                backLogVcfVariant
                            },
                                                                                            config.ShouldOutputRcCounts, config.ShouldOutputTsCounts, false).ToList() : null;
                            moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                        }
                        if ((coLocatedAlleles.Count == 0) || AreColocated(coLocatedAlleles, incomingBatch))
                        {
                            coLocatedAlleles.AddRange(incomingBatch);
                            incomingBatch.Clear();

                            //colocated alleles are left behind
                        }
                        else
                        {
                            FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);
                            coLocatedAlleles.Clear();

                            //incomingBatch alleles are left behind
                        }
                    }

                    //if you get here, there is no more unprocessed vcf variants but there could be
                    //colocated or an incoming batch of alleles left over. We need to write them to file before exiting.

                    FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);

                    FilterAndStreamOut(incomingBatch, writer, geometricFilter);
                }
            }
        }
Beispiel #4
0
 public static TypeOfUpdateNeeded UpdateColocatedAlleles(VcfConsumerAppOptions appOptions, GeometricFilter filter, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles)
 {
     outAlleles = filter.DoFiltering(inAlleles);
     return(TypeOfUpdateNeeded.Modify);
 }