// idea is to keep track of the disparity between two pools as a measure of FFPE degradation, // or overall oxidation affecting tissue sample. //possible SNP changes: // // // * A C G T // A * 1 2 3 // C 4 * 5 6 // G 7 8 * 9 // T 10 11 12 * // public static SignatureSorterResultFiles StrainVcf(VQROptions options) { var variantList = new List <CalledAllele>() { }; var basicCountsData = new CountData(); var edgeVariantsCountData = new EdgeIssueCountData(options.ExtentofEdgeRegion); string basicCountsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".counts"); string edgeCountsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgecounts"); string edgeVariantsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgevariants"); using (AlleleReader readerA = new AlleleReader(options.VcfPath)) { while (readerA.GetNextVariants(out variantList)) { foreach (var variant in variantList) { try { basicCountsData.Add(variant); edgeVariantsCountData.Add(variant, edgeVariantsPath); } catch (Exception ex) { Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}. Exception: {2}", variant.Chromosome, variant.ReferencePosition, ex)); throw; } } } //The edge issue filter trails N variants behind. //The following code cleans out the buffer, processing anything left behind in the buffer. for (int i = 0; i < options.ExtentofEdgeRegion; i++) { edgeVariantsCountData.Add(null, edgeVariantsPath); } if (options.LociCount > 0) { basicCountsData.ForceTotalPossibleMutations(options.LociCount); edgeVariantsCountData.ForceTotalPossibleMutations(options.LociCount); } if (options.DoBasicChecks) { CountsFileWriter.WriteCountsFile(basicCountsPath, basicCountsData); } if (options.DoAmpliconPositionChecks) { CountsFileWriter.WriteCountsFile(edgeCountsPath, edgeVariantsCountData); } } return(new SignatureSorterResultFiles(basicCountsPath, edgeCountsPath, edgeVariantsPath)); }
public static void AssertSameVariants_QScoreAgnostic(string file1, string file2) { var variant1List = new List <CalledAllele>(); var variant2List = new List <CalledAllele>(); using (var reader1 = new AlleleReader(file1)) { reader1.GetNextVariants(out variant1List); using (var reader2 = new AlleleReader(file2)) { reader2.GetNextVariants(out variant2List); Assert.Equal(variant1List.Count, variant2List.Count); for (int i = 0; i < variant1List.Count; i++) { var variant1 = variant1List[i]; var variant2 = variant2List[i]; Assert.Equal(variant1.Genotype, variant2.Genotype); Assert.Equal(variant1.AlternateAllele, variant2.AlternateAllele); } } } }
private bool TestVariant(AlleleReader vr, AlleleCategory type) { var testVarList = new List <CalledAllele>() { new CalledAllele() }; vr.GetNextVariants(out testVarList); return(testVarList[0].Type == type); }
/// <summary> /// Step forward with the reader, assembling a list of variants at your CurrentVariant position. /// </summary> /// <param name="Reader"></param> /// <param name="CurrentVariant"></param> /// <param name="BackLogExists"></param> /// <param name="TheBackLog"></param> /// <returns></returns> private static List <CalledAllele> AssembleColocatedList( AlleleReader Reader, CalledAllele CurrentVariant, AlleleCompareByLoci alleleOrdering, ref bool BackLogExists, ref List <CalledAllele> TheBackLog) { List <CalledAllele> CoLocatedVariants = new List <CalledAllele>(); bool ContinueReadA = true; var NextVariantList = new List <CalledAllele>(); while (ContinueReadA) { if (BackLogExists) { NextVariantList = TheBackLog; BackLogExists = false; } else { ContinueReadA = Reader.GetNextVariants(out NextVariantList); if (!ContinueReadA) { break; } } // VarOrder = -1 if Current comes first, 0 if co-located. int VarOrder = (alleleOrdering.OrderAlleles(CurrentVariant, NextVariantList.First())); switch (VarOrder) { case 0: //the variant we just got is at out current position CoLocatedVariants.AddRange(NextVariantList); break; case -1: //the variant we just got is after our current position, and needs to go to the backlog. TheBackLog = NextVariantList; //NextVariant; ContinueReadA = false; BackLogExists = true; break; default: // { throw new InvalidDataException("Vcf needs to be ordered."); } } } if (!BackLogExists) { TheBackLog = null; } return(CoLocatedVariants); }
public static string WriteCountsFile(string vcfIn, string outDir, int lociCount) { var variants = new List <CalledAllele>(); var countsPath = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts")); var countsPathOld = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts.original")); if (File.Exists(countsPath)) { if (File.Exists(countsPathOld)) { File.Delete(countsPathOld); } File.Copy(countsPath, countsPathOld); File.Delete(countsPath); } var counter = new MutationCounter(); using (AlleleReader readerA = new AlleleReader(vcfIn)) { counter.StartWriter(countsPath); while (readerA.GetNextVariants(out variants)) { foreach (var variant in variants) { try { counter.Add(variant); } catch (Exception ex) { Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}. Exception: {2}", variant.Chromosome, variant.ReferencePosition, ex)); throw; } } } if (lociCount > 0) { counter.ForceTotalPossibleMutations(lociCount); } counter.CloseWriter(); } return(countsPath); }
public static bool GetNextUncrushedAllele(AlleleReader reader, out CalledAllele variant) { var nextVariants = new List <CalledAllele>(); bool worked = reader.GetNextVariants(out nextVariants); variant = null; if (worked) { if (nextVariants.Count > 1) { throw new ArgumentException("Input file should not have crushed variants. There should only be one variant per line"); } variant = nextVariants[0]; } return(worked); }
public RecalibratedVariantsTests() { RecalCollection = new RecalibratedVariantsCollection(); var vcfPath = Path.Combine(TestPaths.LocalTestDataDirectory, "VariantDepthReaderTest.vcf"); using (var reader = new AlleleReader(vcfPath)) { var coLocatedVariantList = new List <CalledAllele>(); var lastVariant = new CalledAllele(); while (reader.GetNextVariants(out coLocatedVariantList)) { var variant = coLocatedVariantList[0]; if (lastVariant.ReferencePosition == variant.ReferencePosition) { continue; } RecalCollection.AddLocus(variant); lastVariant = variant; variant = new CalledAllele(); } } }
public void GetNextVariantTests() { var resultVariant = new CalledAllele(); var resultVariants = new List <CalledAllele> { resultVariant }; string resultString = string.Empty; var vr = new AlleleReader(VcfTestFile_1); vr.GetNextVariants(out resultVariants, out resultString); Assert.Equal(resultString.TrimEnd('\r'), @"chr1 10 . A . 25 PASS DP=500 GT:GQ:AD:VF:NL:SB:NC 1/1:25:0,0:0.0000:23:0.0000:0.0010"); Assert.Equal(resultVariants[0].Chromosome, "chr1"); Assert.Equal(resultVariants[0].ReferenceAllele, "A"); Assert.Equal(resultVariants[0].AlternateAllele, "."); //Note, we have seen this assert below fail for specific user configurations //When it fails the error mesg is as below: //Assert.Equal() Failure //Expected: 1428 //Actual: 1452 //If this happens to you, check your git attributes config file. //You might be handling vcf text file line endings differently so the white space counts differently in this test. // In that case, the fail is purely cosmetic. // //try: Auto detect text files and perform LF normalization //# http://davidlaing.com/2012/09/19/customise-your-gitattributes-to-become-a-git-ninja/ //*text = auto //*.cs diff = csharp //*.bam binary //*.vcf text //.fa text eol = crlf if (vr.Position() == 1428) { Console.WriteLine("This isn't critical, but you might want to change your line endings convention. "); Console.WriteLine("This project was developed with \\CR\\LF , not \\LF convention."); } else { Assert.Equal(1452, vr.Position()); } var resultStringArray = new string[] { }; resultVariant = new CalledAllele(); resultVariants = new List <CalledAllele> { resultVariant }; vr.GetNextVariants(out resultVariants, out resultString); Assert.Equal(resultString.TrimEnd('\r'), @"chr1 20 . A T 25 PASS DP=500 GT:GQ:AD:VF:NL:SB:NC 1/1:25:0,0:0.0000:23:0.0000:0.0010"); for (var i = 0; i < resultStringArray.Length; i++) { resultStringArray[i] = resultStringArray[i].TrimEnd('\r'); } Assert.Equal(resultVariants[0].Chromosome, "chr1"); resultVariant = new CalledAllele(); resultVariants = new List <CalledAllele> { resultVariant }; vr.GetNextVariants(out resultVariants); Assert.Equal(resultVariants[0].Chromosome, "chr1"); Assert.Equal(resultVariants[0].ReferenceAllele, "A"); Assert.Equal(resultVariants[0].AlternateAllele, "AT"); }
/// <summary> /// perfom a Venn split between two samples /// </summary> /// <param name="sampleName"></param> /// <param name="consensusFilePath"></param> /// <param name="inputPaths"></param> /// <param name="outputTwoSampleResults"></param> public void DoPairwiseVenn() { bool doConsensus = (consensusBuilder != null); bool requireGenotypes = false; var backLogPoolAAlleles = new List <CalledAllele>() { }; var backLogPoolBAlleles = new List <CalledAllele>() { }; using (AlleleReader ReaderA = new AlleleReader(_inputPaths[0], requireGenotypes)) using (AlleleReader ReaderB = new AlleleReader(_inputPaths[1], requireGenotypes)) { var vcfA_HeaderLines = ReaderA.HeaderLines; var chrOrderingFromInput = ChrCompare.GetChrListFromVcfHeader(vcfA_HeaderLines); var alleleCompareByLoci = new AlleleCompareByLoci(chrOrderingFromInput); if (doConsensus) { consensusBuilder.OpenConsensusFile(vcfA_HeaderLines); } OpenVennDiagramStreams(ReaderA.HeaderLines); //read the first variant from each gvcf file... var currentAllele = new CalledAllele(); var backLogExistPoolA = ReaderA.GetNextVariants(out backLogPoolAAlleles); var backLogExistPoolB = ReaderB.GetNextVariants(out backLogPoolBAlleles); //keep reading and processing until we are done with both gvcfs while (true) { try { //1) Get the next set of variants. Pull from the backlog first, //choosing all the variants at the first available position. var coLocatedPoolAAlleles = new List <CalledAllele>(); var coLocatedPoolBAlleles = new List <CalledAllele>(); //We need to set up which location to look at next. //Choose the first one from the backlog. if (backLogExistPoolA || backLogExistPoolB) { if (backLogExistPoolA && backLogExistPoolB) { int OrderResult = alleleCompareByLoci.OrderAlleles( backLogPoolAAlleles.First(), backLogPoolBAlleles.First()); if (OrderResult < 0) { currentAllele.Chromosome = backLogPoolAAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition; } else { currentAllele.Chromosome = backLogPoolBAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition; } } else if (backLogExistPoolB) { currentAllele.Chromosome = backLogPoolBAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition; } else //if (backLogExistPoolA) { currentAllele.Chromosome = backLogPoolAAlleles.First().Chromosome; currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition; } //assemble lists of co-located variants at the position of the current variant coLocatedPoolAAlleles = AssembleColocatedList(ReaderA, currentAllele, alleleCompareByLoci, ref backLogExistPoolA, ref backLogPoolAAlleles); coLocatedPoolBAlleles = AssembleColocatedList(ReaderB, currentAllele, alleleCompareByLoci, ref backLogExistPoolB, ref backLogPoolBAlleles); } //else, if there is nothing in either backlog, the colocated-variant list should stay empty. //2) Now we have finshed reading out all the co-located variants... //We need organize them into pairs, to know which allele to compare with which. var Pairs = SelectPairs(coLocatedPoolAAlleles, coLocatedPoolBAlleles); var ConsensusVariants = new List <CalledAllele>(); AggregateAllele lastConsensusReferenceCall = null; //3) For each pair, combine them and mark if biased or not. for (int PairIndex = 0; PairIndex < Pairs.Count; PairIndex++) { var VariantA = Pairs[PairIndex][0]; var VariantB = Pairs[PairIndex][1]; var ComparisonCase = GetComparisonCase(VariantA, VariantB); //add VarA and VarB to appropriate venn diagram files. WriteVarsToVennFiles(ComparisonCase, VariantA, VariantB); AggregateAllele Consensus = null; if (doConsensus) { Consensus = consensusBuilder.CombineVariants( VariantA, VariantB, ComparisonCase); //Its possible for multiallelic sites, a pair of variants could //end up as a concensus reference. And we already may have //called a reference for this loci already. //we might have some cleaning up to do... if (Consensus.Genotype == Pisces.Domain.Types.Genotype.HomozygousRef) { //this is the first time we see a reference at this loci if (lastConsensusReferenceCall == null) { lastConsensusReferenceCall = Consensus; //its OK to fall through and add our Consensus variant to the list. } //Else, if we have already called a reference variant // for this loci already // we want to merge the results from this reference with the old one. // *before* we write it to file. else { //the chr, pos, ref, alt,and depth should be correct. //We'll merge the filters, //and take the max SB and PB. (where a higher value indicates worse value, so we stay conservative) lastConsensusReferenceCall.Filters = ConsensusBuilder.CombineFilters(lastConsensusReferenceCall, Consensus); lastConsensusReferenceCall.StrandBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = Math.Max(lastConsensusReferenceCall.StrandBiasResults.GATKBiasScore, Consensus.StrandBiasResults.GATKBiasScore) }; lastConsensusReferenceCall.PoolBiasResults = new Pisces.Domain.Models.BiasResults() { GATKBiasScore = Math.Max(lastConsensusReferenceCall.PoolBiasResults.GATKBiasScore, Consensus.PoolBiasResults.GATKBiasScore) }; //we are going to take the min Q and NL score, to be conservative lastConsensusReferenceCall.NoiseLevelApplied = Math.Min(lastConsensusReferenceCall.NoiseLevelApplied, Consensus.NoiseLevelApplied); lastConsensusReferenceCall.GenotypeQscore = Math.Min(lastConsensusReferenceCall.GenotypeQscore, Consensus.GenotypeQscore); lastConsensusReferenceCall.VariantQscore = Math.Min(lastConsensusReferenceCall.VariantQscore, Consensus.GenotypeQscore); continue; } } ConsensusVariants.Add(Consensus); } } //4) Write out the results to file. (this will be a list of co-located variants) if (doConsensus) { consensusBuilder.WriteConsensusVariantsToFile(ConsensusVariants); } //we assembled everyone and no one is left. if ((backLogPoolAAlleles == null) && (backLogPoolBAlleles == null)) { break; } } catch (Exception ex) { OnError(string.Format("Fatal error encountered comparing paired sample vcfs; Check {0}, position {1}. Exception: {2}", currentAllele.Chromosome, currentAllele.ReferencePosition, ex)); throw; } } //close assemble list }//close usings if (doConsensus) { consensusBuilder.CloseConsensusFile(); } CloseVennDiagramStreams(); }