Exemple #1
0
        /// <summary>
        /// Step forward with the reader, assembling a list of variants at your CurrentVariant position.
        /// </summary>
        /// <param name="Reader"></param>
        /// <param name="CurrentVariant"></param>
        /// <param name="BackLogExists"></param>
        /// <param name="TheBackLog"></param>
        /// <returns></returns>
        private static List <CalledAllele> AssembleColocatedList(
            VcfReader Reader, CalledAllele CurrentVariant, bool mFirst,
            ref bool BackLogExists, ref List <CalledAllele> TheBackLog)
        {
            List <CalledAllele> CoLocatedVariants = new List <CalledAllele>();
            bool ContinueReadA = true;

            while (ContinueReadA)
            {
                var NextVariantList = new List <CalledAllele>();

                if (BackLogExists)
                {
                    NextVariantList = TheBackLog;
                    BackLogExists   = false;
                }
                else
                {
                    VcfVariant NextVariant = new VcfVariant();
                    ContinueReadA = Reader.GetNextVariant(NextVariant);

                    if (!ContinueReadA)
                    {
                        break;
                    }

                    NextVariantList = VcfVariantUtilities.Convert(new List <VcfVariant> {
                        NextVariant
                    }).ToList();
                }

                // VarOrde =  -1 if Current comes first, 0 if co-located.
                int VarOrder = (AlleleCompareByLoci.OrderAlleles(CurrentVariant, NextVariantList.First(), mFirst));

                switch (VarOrder)
                {
                case 0:     //the variant we just got is at out current position
                    CoLocatedVariants.AddRange(NextVariantList);
                    break;

                case -1:                             //the variant we just got is after our current position, and needs to go to the backlog.
                    TheBackLog    = NextVariantList; //NextVariant;
                    ContinueReadA = false;
                    BackLogExists = true;
                    break;

                default:     //
                {
                    throw new InvalidDataException("Vcf needs to be ordered.");
                }
                }
            }

            if (!BackLogExists)
            {
                TheBackLog = null;
            }

            return(CoLocatedVariants);
        }
        public void OrderVariants()
        {
            var chr10           = TestHelper.CreateDummyAllele("chr10", 123, "A", "C", 1000, 156);
            var chrX            = TestHelper.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156);
            var chrXSecond      = TestHelper.CreateDummyAllele("chrX", 124, "A", "C", 1000, 156);
            var chrM            = TestHelper.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156);
            var chrMSecond      = TestHelper.CreateDummyAllele("chrM", 124, "A", "C", 1000, 156);
            var chr9            = TestHelper.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156);
            var chr9Second      = TestHelper.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156);
            var nonstandardChrZ = TestHelper.CreateDummyAllele("chrZ", 123, "A", "C", 1000, 156);
            var nonstandardChrA = TestHelper.CreateDummyAllele("chrA", 123, "A", "C", 1000, 156);

            // ---------------------------------------------------------------------------
            // When neither or both is on chrM, shouldn't matter if we set option to prioritize chrM
            // ---------------------------------------------------------------------------

            // Same chrom, different positions - numeric chrom
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr9Second, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr9Second, false));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9Second, chr9, true));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9Second, chr9, false));

            // Same chrom, different positions - chrX
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrXSecond, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrXSecond, false));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrXSecond, chrX, true));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrXSecond, chrX, false));

            // Same chrom, different positions - chrM
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrM, chrMSecond, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrM, chrMSecond, false));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrMSecond, chrM, true));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrMSecond, chrM, false));

            // Different chroms, one is >=10 (direct string compare would not sort these chroms correctly)
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr10, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr10, false));

            // One numeric, one chrX
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrX, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrX, false));

            // Same chrom, same position
            Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chr9, chr9, true));
            Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chrX, chrX, true));
            Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chrM, chrM, true));


            // ---------------------------------------------------------------------------
            // If one is on chrM, option to prioritize chrM matters
            // ---------------------------------------------------------------------------

            // One numeric, one chrM
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9, chrM, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrM, false));

            // One chrX, one chrM
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, chrM, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrM, false));

            // ---------------------------------------------------------------------------
            // Nonstandard chroms should be below numerics and then ordered alphabetically
            // ---------------------------------------------------------------------------

            // One numeric, one weird
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrZ, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrZ, false));

            // One chrX, one weird
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, false));

            // One chrM, one weird
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, false));

            // One numeric, one funny
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrA, true));
            Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrA, false));

            // One chrX, one funny
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, true));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, false));

            // One chrM, one funny
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, true));
            Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, false));
        }
Exemple #3
0
        /// <summary>
        /// perfom a Venn split between two samples
        /// </summary>
        /// <param name="sampleName"></param>
        /// <param name="consensusFilePath"></param>
        /// <param name="inputPaths"></param>
        /// <param name="outputTwoSampleResults"></param>
        public void DoPairwiseVenn(bool mFirst)
        {
            bool doConsensus      = (consensusBuilder != null);
            bool requireGenotypes = false;

            using (VcfReader ReaderA = new VcfReader(_inputPaths[0], requireGenotypes))
                using (VcfReader ReaderB = new VcfReader(_inputPaths[1], requireGenotypes))
                {
                    if (doConsensus)
                    {
                        consensusBuilder.OpenConsensusFile(ReaderA.HeaderLines);
                    }

                    OpenVennDiagramStreams(ReaderA.HeaderLines);

                    //read the first variant from each gvcf file...
                    var currentAllele          = new CalledAllele();
                    var backLogPoolAVcfVariant = new VcfVariant();
                    var backLogPoolBVcfVariant = new VcfVariant();

                    var backLogExistPoolA = ReaderA.GetNextVariant(backLogPoolAVcfVariant);
                    var backLogExistPoolB = ReaderB.GetNextVariant(backLogPoolBVcfVariant);

                    var backLogPoolAAlleles = backLogExistPoolA ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                        backLogPoolAVcfVariant
                    }).ToList() : null;
                    var backLogPoolBAlleles = backLogExistPoolB ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                        backLogPoolBVcfVariant
                    }).ToList() : null;

                    //keep reading and processing until we are done with both gvcfs
                    while (true)
                    {
                        try
                        {
                            //1) Get the next set of variants. Pull from the backlog first,
                            //choosing all the variants at the first available position.
                            var coLocatedPoolAAlleles = new List <CalledAllele>();
                            var coLocatedPoolBAlleles = new List <CalledAllele>();

                            //We need to set up which location to look at next.
                            //Choose the first one from the backlog.

                            if (backLogExistPoolA || backLogExistPoolB)
                            {
                                if (backLogExistPoolA && backLogExistPoolB)
                                {
                                    int OrderResult = AlleleCompareByLoci.OrderAlleles(
                                        backLogPoolAAlleles.First(), backLogPoolBAlleles.First(), mFirst);
                                    if (OrderResult < 0)
                                    {
                                        currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                    }
                                    else
                                    {
                                        currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                    }
                                }
                                else if (backLogExistPoolB)
                                {
                                    currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                }
                                else //if (backLogExistPoolA)
                                {
                                    currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                }

                                //assemble lists of co-located variants at the position of the current variant
                                coLocatedPoolAAlleles = AssembleColocatedList(ReaderA, currentAllele, mFirst,
                                                                              ref backLogExistPoolA, ref backLogPoolAAlleles);

                                coLocatedPoolBAlleles = AssembleColocatedList(ReaderB, currentAllele, mFirst,
                                                                              ref backLogExistPoolB, ref backLogPoolBAlleles);
                            } //else, if there is nothing in either backlog, the colocated-variant list should stay empty.

                            //2) Now we have finshed reading out all the co-located variants...
                            //We need organize them into pairs, to know which allele to compare with which.
                            var             Pairs                      = SelectPairs(coLocatedPoolAAlleles, coLocatedPoolBAlleles);
                            var             ConsensusVariants          = new List <CalledAllele>();
                            AggregateAllele lastConsensusReferenceCall = null;

                            //3) For each pair, combine them and mark if biased or not.
                            for (int PairIndex = 0; PairIndex < Pairs.Count; PairIndex++)
                            {
                                var VariantA = Pairs[PairIndex][0];
                                var VariantB = Pairs[PairIndex][1];

                                var ComparisonCase = GetComparisonCase(VariantA, VariantB);


                                //add VarA and VarB to appropriate venn diagram files.
                                WriteVarsToVennFiles(ComparisonCase, VariantA, VariantB);
                                AggregateAllele Consensus = null;

                                if (doConsensus)
                                {
                                    Consensus = consensusBuilder.CombineVariants(
                                        VariantA, VariantB, ComparisonCase);


                                    //Its possible for multiallelic sites, a pair of variants could
                                    //end up as a concensus reference. And we already may have
                                    //called a reference for this loci already.
                                    //we might have some cleaning up to do...
                                    if (Consensus.Genotype == Pisces.Domain.Types.Genotype.HomozygousRef)
                                    {
                                        //this is the first time we see a reference at this loci
                                        if (lastConsensusReferenceCall == null)
                                        {
                                            lastConsensusReferenceCall = Consensus;
                                            //its OK to fall through and add our Consensus variant to the list.
                                        }

                                        //Else, if we have already called a reference variant
                                        // for this loci already
                                        // we want to merge the results from this reference with the old one.
                                        // *before* we write it to file.
                                        else
                                        {
                                            //the chr, pos, ref, alt,and depth should be correct.
                                            //We'll merge the filters,
                                            //and take the max SB and PB. (where a higher value indicates worse value, so we stay conservative)
                                            lastConsensusReferenceCall.Filters = ConsensusBuilder.CombineFilters(lastConsensusReferenceCall, Consensus);

                                            lastConsensusReferenceCall.StrandBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.StrandBiasResults.GATKBiasScore, Consensus.StrandBiasResults.GATKBiasScore)
                                            };

                                            lastConsensusReferenceCall.PoolBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.PoolBiasResults.GATKBiasScore, Consensus.PoolBiasResults.GATKBiasScore)
                                            };

                                            //we are going to take the min Q and NL score, to be conservative
                                            lastConsensusReferenceCall.NoiseLevelApplied = Math.Min(lastConsensusReferenceCall.NoiseLevelApplied, Consensus.NoiseLevelApplied);
                                            lastConsensusReferenceCall.GenotypeQscore    = Math.Min(lastConsensusReferenceCall.GenotypeQscore, Consensus.GenotypeQscore);
                                            lastConsensusReferenceCall.VariantQscore     = Math.Min(lastConsensusReferenceCall.VariantQscore, Consensus.GenotypeQscore);

                                            continue;
                                        }
                                    }

                                    ConsensusVariants.Add(Consensus);
                                }
                            }

                            //4) Write out the results to file. (this will be a list of co-located variants)

                            if (doConsensus)
                            {
                                consensusBuilder.WriteConsensusVariantsToFile(ConsensusVariants);
                            }

                            //we assembled everyone and no one is left.
                            if ((backLogPoolAAlleles == null) &&
                                (backLogPoolBAlleles == null))
                            {
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                            OnError(string.Format("Fatal error encountered comparing paired sample vcfs; Check {0}, position {1}.  Exception: {2}",
                                                  currentAllele.Chromosome, currentAllele.ReferencePosition, ex));
                            throw;
                        }
                    } //close assemble list
                }//close usings

            if (doConsensus)
            {
                consensusBuilder.CloseConsensusFile();
            }

            CloseVennDiagramStreams();
        }