/// <summary> /// Step forward with the reader, assembling a list of variants at your CurrentVariant position. /// </summary> /// <param name="Reader"></param> /// <param name="CurrentVariant"></param> /// <param name="BackLogExists"></param> /// <param name="TheBackLog"></param> /// <returns></returns> private static List <CalledAllele> AssembleColocatedList( VcfReader Reader, CalledAllele CurrentVariant, bool mFirst, ref bool BackLogExists, ref List <CalledAllele> TheBackLog) { List <CalledAllele> CoLocatedVariants = new List <CalledAllele>(); bool ContinueReadA = true; while (ContinueReadA) { var NextVariantList = new List <CalledAllele>(); if (BackLogExists) { NextVariantList = TheBackLog; BackLogExists = false; } else { VcfVariant NextVariant = new VcfVariant(); ContinueReadA = Reader.GetNextVariant(NextVariant); if (!ContinueReadA) { break; } NextVariantList = VcfVariantUtilities.Convert(new List <VcfVariant> { NextVariant }).ToList(); } // VarOrde = -1 if Current comes first, 0 if co-located. int VarOrder = (AlleleCompareByLoci.OrderAlleles(CurrentVariant, NextVariantList.First(), mFirst)); switch (VarOrder) { case 0: //the variant we just got is at out current position CoLocatedVariants.AddRange(NextVariantList); break; case -1: //the variant we just got is after our current position, and needs to go to the backlog. TheBackLog = NextVariantList; //NextVariant; ContinueReadA = false; BackLogExists = true; break; default: // { throw new InvalidDataException("Vcf needs to be ordered."); } } } if (!BackLogExists) { TheBackLog = null; } return(CoLocatedVariants); }
public void OrderVariants() { var chr10 = TestHelper.CreateDummyAllele("chr10", 123, "A", "C", 1000, 156); var chrX = TestHelper.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156); var chrXSecond = TestHelper.CreateDummyAllele("chrX", 124, "A", "C", 1000, 156); var chrM = TestHelper.CreateDummyAllele("chrM", 123, "A", "C", 1000, 156); var chrMSecond = TestHelper.CreateDummyAllele("chrM", 124, "A", "C", 1000, 156); var chr9 = TestHelper.CreateDummyAllele("chr9", 123, "A", "C", 1000, 156); var chr9Second = TestHelper.CreateDummyAllele("chr9", 124, "A", "C", 1000, 156); var nonstandardChrZ = TestHelper.CreateDummyAllele("chrZ", 123, "A", "C", 1000, 156); var nonstandardChrA = TestHelper.CreateDummyAllele("chrA", 123, "A", "C", 1000, 156); // --------------------------------------------------------------------------- // When neither or both is on chrM, shouldn't matter if we set option to prioritize chrM // --------------------------------------------------------------------------- // Same chrom, different positions - numeric chrom Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr9Second, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr9Second, false)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9Second, chr9, true)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9Second, chr9, false)); // Same chrom, different positions - chrX Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrXSecond, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrXSecond, false)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrXSecond, chrX, true)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrXSecond, chrX, false)); // Same chrom, different positions - chrM Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrM, chrMSecond, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrM, chrMSecond, false)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrMSecond, chrM, true)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrMSecond, chrM, false)); // Different chroms, one is >=10 (direct string compare would not sort these chroms correctly) Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr10, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chr10, false)); // One numeric, one chrX Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrX, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrX, false)); // Same chrom, same position Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chr9, chr9, true)); Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chrX, chrX, true)); Assert.Equal(0, AlleleCompareByLoci.OrderAlleles(chrM, chrM, true)); // --------------------------------------------------------------------------- // If one is on chrM, option to prioritize chrM matters // --------------------------------------------------------------------------- // One numeric, one chrM Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chr9, chrM, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, chrM, false)); // One chrX, one chrM Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, chrM, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, chrM, false)); // --------------------------------------------------------------------------- // Nonstandard chroms should be below numerics and then ordered alphabetically // --------------------------------------------------------------------------- // One numeric, one weird Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrZ, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrZ, false)); // One chrX, one weird Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, false)); // One chrM, one weird Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrZ, false)); // One numeric, one funny Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrA, true)); Assert.Equal(-1, AlleleCompareByLoci.OrderAlleles(chr9, nonstandardChrA, false)); // One chrX, one funny Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, true)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, false)); // One chrM, one funny Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, true)); Assert.Equal(1, AlleleCompareByLoci.OrderAlleles(chrX, nonstandardChrA, false)); }
/// <summary> /// perfom a Venn split between two samples /// </summary> /// <param name="sampleName"></param> /// <param name="consensusFilePath"></param> /// <param name="inputPaths"></param> /// <param name="outputTwoSampleResults"></param> public void DoPairwiseVenn(bool mFirst) { bool doConsensus = (consensusBuilder != null); bool requireGenotypes = false; using (VcfReader ReaderA = new VcfReader(_inputPaths[0], requireGenotypes)) using (VcfReader ReaderB = new VcfReader(_inputPaths[1], requireGenotypes)) { if (doConsensus) { consensusBuilder.OpenConsensusFile(ReaderA.HeaderLines); } OpenVennDiagramStreams(ReaderA.HeaderLines); //read the first variant from each gvcf file... var currentAllele = new CalledAllele(); var backLogPoolAVcfVariant = new VcfVariant(); var backLogPoolBVcfVariant = new VcfVariant(); var backLogExistPoolA = ReaderA.GetNextVariant(backLogPoolAVcfVariant); var backLogExistPoolB = ReaderB.GetNextVariant(backLogPoolBVcfVariant); var backLogPoolAAlleles = backLogExistPoolA ? VcfVariantUtilities.Convert(new List <VcfVariant> { backLogPoolAVcfVariant }).ToList() : null; var backLogPoolBAlleles = backLogExistPoolB ? VcfVariantUtilities.Convert(new List <VcfVariant> { backLogPoolBVcfVariant }).ToList() : null; //keep reading and processing until we are done with both gvcfs while (true) { try { //1) Get the next set of variants. Pull from the backlog first, //choosing all the variants at the first available position. var coLocatedPoolAAlleles = new List <CalledAllele>(); var coLocatedPoolBAlleles = new List <CalledAllele>(); //We need to set up which location to look at next. //Choose the first one from the backlog. if (backLogExistPoolA || backLogExistPoolB) { if (backLogExistPoolA && backLogExistPoolB) { int OrderResult = AlleleCompareByLoci.OrderAlleles( backLogPoolAAlleles.First(), backLogPoolBAlleles.First(), mFirst); if (OrderResult < 0) { currentAllele.Chromosome = backLogPoolAAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition; } else { currentAllele.Chromosome = backLogPoolBAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition; } } else if (backLogExistPoolB) { currentAllele.Chromosome = backLogPoolBAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition; } else //if (backLogExistPoolA) { currentAllele.Chromosome = backLogPoolAAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition; } //assemble lists of co-located variants at the position of the current variant coLocatedPoolAAlleles = AssembleColocatedList(ReaderA, currentAllele, mFirst, ref backLogExistPoolA, ref backLogPoolAAlleles); coLocatedPoolBAlleles = AssembleColocatedList(ReaderB, currentAllele, mFirst, ref backLogExistPoolB, ref backLogPoolBAlleles); } //else, if there is nothing in either backlog, the colocated-variant list should stay empty. //2) Now we have finshed reading out all the co-located variants... //We need organize them into pairs, to know which allele to compare with which. var Pairs = SelectPairs(coLocatedPoolAAlleles, coLocatedPoolBAlleles); var ConsensusVariants = new List <CalledAllele>(); AggregateAllele lastConsensusReferenceCall = null; //3) For each pair, combine them and mark if biased or not. for (int PairIndex = 0; PairIndex < Pairs.Count; PairIndex++) { var VariantA = Pairs[PairIndex][0]; var VariantB = Pairs[PairIndex][1]; var ComparisonCase = GetComparisonCase(VariantA, VariantB); //add VarA and VarB to appropriate venn diagram files. WriteVarsToVennFiles(ComparisonCase, VariantA, VariantB); AggregateAllele Consensus = null; if (doConsensus) { Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); //Its possible for multiallelic sites, a pair of variants could //end up as a concensus reference. And we already may have //called a reference for this loci already. //we might have some cleaning up to do... if (Consensus.Genotype == Pisces.Domain.Types.Genotype.HomozygousRef) { //this is the first time we see a reference at this loci if (lastConsensusReferenceCall == null) { lastConsensusReferenceCall = Consensus; //its OK to fall through and add our Consensus variant to the list. } //Else, if we have already called a reference variant // for this loci already // we want to merge the results from this reference with the old one. // *before* we write it to file. else { //the chr, pos, ref, alt,and depth should be correct. //We'll merge the filters, //and take the max SB and PB. (where a higher value indicates worse value, so we stay conservative) lastConsensusReferenceCall.Filters = ConsensusBuilder.CombineFilters(lastConsensusReferenceCall, Consensus); lastConsensusReferenceCall.StrandBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = Math.Max(lastConsensusReferenceCall.StrandBiasResults.GATKBiasScore, Consensus.StrandBiasResults.GATKBiasScore) }; lastConsensusReferenceCall.PoolBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = Math.Max(lastConsensusReferenceCall.PoolBiasResults.GATKBiasScore, Consensus.PoolBiasResults.GATKBiasScore) }; //we are going to take the min Q and NL score, to be conservative lastConsensusReferenceCall.NoiseLevelApplied = Math.Min(lastConsensusReferenceCall.NoiseLevelApplied, Consensus.NoiseLevelApplied); lastConsensusReferenceCall.GenotypeQscore = Math.Min(lastConsensusReferenceCall.GenotypeQscore, Consensus.GenotypeQscore); lastConsensusReferenceCall.VariantQscore = Math.Min(lastConsensusReferenceCall.VariantQscore, Consensus.GenotypeQscore); continue; } } ConsensusVariants.Add(Consensus); } } //4) Write out the results to file. (this will be a list of co-located variants) if (doConsensus) { consensusBuilder.WriteConsensusVariantsToFile(ConsensusVariants); } //we assembled everyone and no one is left. if ((backLogPoolAAlleles == null) && (backLogPoolBAlleles == null)) { break; } } catch (Exception ex) { OnError(string.Format("Fatal error encountered comparing paired sample vcfs; Check {0}, position {1}. Exception: {2}", currentAllele.Chromosome, currentAllele.ReferencePosition, ex)); throw; } } //close assemble list }//close usings if (doConsensus) { consensusBuilder.CloseConsensusFile(); } CloseVennDiagramStreams(); }