Example #1
0
        public static List <string> GetAllHeaderLines(string vcfPath)
        {
            List <string> header;

            using (var reader = new AlleleReader(vcfPath))
            {
                header = reader.HeaderLines;
            }

            return(header);
        }
Example #2
0
        /// <summary>
        /// Load a list of all variants in a file.  This is memory-intensive; don't do this for whole-genome vcf files!
        /// </summary>
        public static List <CalledAllele> GetAllVariantsInFile(string vcfPath)
        {
            List <CalledAllele> allVariants = new List <CalledAllele>();

            using (AlleleReader reader = new AlleleReader(vcfPath))
            {
                foreach (CalledAllele variant in reader.GetVariants())
                {
                    allVariants.Add(variant);
                }
            }
            return(allVariants);
        }
Example #3
0
        public static Dictionary <string, List <CandidateAllele> > GetVariantsByChromosome(this AlleleReader reader,
                                                                                           bool variantsOnly = false, bool flagIsKnown = false, List <AlleleCategory> typeFilter = null, Func <CandidateAllele, bool> doSkipCandidate = null)
        {
            var lookup = new Dictionary <string, List <CandidateAllele> >();

            var calledVariants = reader.GetVariants();

            foreach (var calledVariant in calledVariants)
            {
                var candidate = BackToCandiate(calledVariant);

                if (candidate.Type != AlleleCategory.Unsupported)
                {
                    if (variantsOnly && candidate.Type == AlleleCategory.Reference)
                    {
                        continue;
                    }

                    if (typeFilter != null && !typeFilter.Contains(candidate.Type))
                    {
                        continue;
                    }

                    if (doSkipCandidate != null && doSkipCandidate(candidate))
                    {
                        continue;
                    }

                    if (flagIsKnown)
                    {
                        candidate.IsKnown = true;
                    }

                    if (!lookup.ContainsKey(candidate.Chromosome))
                    {
                        lookup[candidate.Chromosome] = new List <CandidateAllele>();
                    }

                    lookup[candidate.Chromosome].Add(candidate);
                }
            }
            return(lookup);
        }
Example #4
0
        /// <summary>
        /// Take in a vcf, do stuff to it, write out a vcf. Streamed line by line, loci by loci, so as not to blow up your computer.
        /// </summary>
        /// <param name="vcfOut"> the output file name</param>
        /// <param name="options"> all the parameters associated with writing out a vcf</param>
        /// <param name="recalibrationData">the data you need for doing your "stuff" </param>
        /// <param name="whatToDoWithSingleAllele">how you want to change each allele</param>
        /// <param name="whatToDoWithCoLocatedAlleles">how you want to change each set of alleles, by loci</param>
        /// <param name="canSkipLinesWithoutProcessing">when you can skip lines (saves CPU time)</param>
        /// <param name="getVcfFileWriter">what your special vcf writer should be, includes special header lines, etc</param>
        /// <param name="shouldTrimComplexAlleles">if ACGT-> ACCT is ok, or if you want it trimmed to G -> C. this might affect position and ordering. Generally turn if OFF for processing vcfs, post scylla. </param>
        private static void UpdateVcf(string vcfOut, VcfConsumerAppOptions options, bool shouldTrimComplexAlleles, T recalibrationData,
                                      UpdateSingleAlleleMethod whatToDoWithSingleAllele, UpdateCoLocatedAllelesMethod whatToDoWithCoLocatedAlleles,
                                      CanSkipVcfLinesMethod canSkipLinesWithoutProcessing, GetVcfFileWriter getVcfFileWriter)
        {
            using (AlleleReader reader = new AlleleReader(options.VcfPath, shouldTrimComplexAlleles))
            {
                using (VcfFileWriter writer = getVcfFileWriter(options, vcfOut))
                {
                    writer.WriteHeader();
                    writer.FlushBuffer();

                    var variantListFromFile = new List <CalledAllele>()
                    {
                    };

                    string incomingHangingLine = null;
                    string outgoingHangingLine = null;

                    while (true)
                    {
                        //get the next group to process
                        incomingHangingLine = outgoingHangingLine;
                        var coLocatedVcfLinesToProcess = reader.CloseColocatedLines(incomingHangingLine,
                                                                                    out outgoingHangingLine);

                        //how we know we are done
                        if (coLocatedVcfLinesToProcess.Count == 0)
                        {
                            break;
                        }

                        bool updateNeededForLocus = false;
                        TypeOfUpdateNeeded updatedNeededForLine = canSkipLinesWithoutProcessing(coLocatedVcfLinesToProcess);

                        switch (updatedNeededForLine)
                        {
                        case TypeOfUpdateNeeded.NoChangeNeeded:
                            writer.Write(coLocatedVcfLinesToProcess);
                            break;

                        case TypeOfUpdateNeeded.Modify:
                            //then we need to change them into alleles and do stuff to them
                            variantListFromFile = AlleleReader.VcfLinesToAlleles(coLocatedVcfLinesToProcess);
                            List <CalledAllele> modifiedVariantListToWrite = WhatToDoToAlleles(options, recalibrationData,
                                                                                               whatToDoWithSingleAllele, whatToDoWithCoLocatedAlleles, variantListFromFile, ref updateNeededForLocus);

                            if (updateNeededForLocus)
                            {
                                writer.Write(modifiedVariantListToWrite);
                            }
                            else
                            {
                                writer.Write(coLocatedVcfLinesToProcess);
                            }
                            break;

                        case TypeOfUpdateNeeded.DeleteCompletely:
                        default:
                            break;
                        }
                    }
                }
            }
        }