Exemplo n.º 1
0
        // idea is to keep track of the disparity between two pools as a measure of FFPE degradation,
        // or overall oxidation affecting tissue sample.


        //possible SNP changes:
        //
        //
        // *    A   C   G   T
        //  A   *   1   2   3
        //  C   4   *   5   6
        //  G   7   8   *   9
        //  T   10  11  12  *
        //

        public static SignatureSorterResultFiles StrainVcf(VQROptions options)
        {
            var variantList = new List <CalledAllele>()
            {
            };
            var basicCountsData       = new CountData();
            var edgeVariantsCountData = new EdgeIssueCountData(options.ExtentofEdgeRegion);

            string basicCountsPath  = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".counts");
            string edgeCountsPath   = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgecounts");
            string edgeVariantsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgevariants");

            using (AlleleReader readerA = new AlleleReader(options.VcfPath))
            {
                while (readerA.GetNextVariants(out variantList))
                {
                    foreach (var variant in variantList)
                    {
                        try
                        {
                            basicCountsData.Add(variant);
                            edgeVariantsCountData.Add(variant, edgeVariantsPath);
                        }


                        catch (Exception ex)
                        {
                            Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                            variant.Chromosome, variant.ReferencePosition, ex));
                            throw;
                        }
                    }
                }

                //The edge issue filter trails N variants behind.
                //The following code cleans out the buffer, processing anything left behind in the buffer.
                for (int i = 0; i < options.ExtentofEdgeRegion; i++)
                {
                    edgeVariantsCountData.Add(null, edgeVariantsPath);
                }

                if (options.LociCount > 0)
                {
                    basicCountsData.ForceTotalPossibleMutations(options.LociCount);
                    edgeVariantsCountData.ForceTotalPossibleMutations(options.LociCount);
                }

                if (options.DoBasicChecks)
                {
                    CountsFileWriter.WriteCountsFile(basicCountsPath, basicCountsData);
                }

                if (options.DoAmpliconPositionChecks)
                {
                    CountsFileWriter.WriteCountsFile(edgeCountsPath, edgeVariantsCountData);
                }
            }

            return(new SignatureSorterResultFiles(basicCountsPath, edgeCountsPath, edgeVariantsPath));
        }
Exemplo n.º 2
0
        public static void AssertSameVariants_QScoreAgnostic(string file1, string file2)
        {
            var variant1List = new List <CalledAllele>();
            var variant2List = new List <CalledAllele>();

            using (var reader1 = new AlleleReader(file1))
            {
                reader1.GetNextVariants(out variant1List);
                using (var reader2 = new AlleleReader(file2))
                {
                    reader2.GetNextVariants(out variant2List);

                    Assert.Equal(variant1List.Count, variant2List.Count);

                    for (int i = 0; i < variant1List.Count; i++)
                    {
                        var variant1 = variant1List[i];
                        var variant2 = variant2List[i];

                        Assert.Equal(variant1.Genotype, variant2.Genotype);
                        Assert.Equal(variant1.AlternateAllele, variant2.AlternateAllele);
                    }
                }
            }
        }
Exemplo n.º 3
0
        private bool TestVariant(AlleleReader vr, AlleleCategory type)
        {
            var testVarList = new List <CalledAllele>()
            {
                new CalledAllele()
            };

            vr.GetNextVariants(out testVarList);
            return(testVarList[0].Type == type);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Step forward with the reader, assembling a list of variants at your CurrentVariant position.
        /// </summary>
        /// <param name="Reader"></param>
        /// <param name="CurrentVariant"></param>
        /// <param name="BackLogExists"></param>
        /// <param name="TheBackLog"></param>
        /// <returns></returns>
        private static List <CalledAllele> AssembleColocatedList(
            AlleleReader Reader, CalledAllele CurrentVariant, AlleleCompareByLoci alleleOrdering,
            ref bool BackLogExists, ref List <CalledAllele> TheBackLog)
        {
            List <CalledAllele> CoLocatedVariants = new List <CalledAllele>();
            bool ContinueReadA   = true;
            var  NextVariantList = new List <CalledAllele>();

            while (ContinueReadA)
            {
                if (BackLogExists)
                {
                    NextVariantList = TheBackLog;
                    BackLogExists   = false;
                }
                else
                {
                    ContinueReadA = Reader.GetNextVariants(out NextVariantList);

                    if (!ContinueReadA)
                    {
                        break;
                    }
                }

                // VarOrder =  -1 if Current comes first, 0 if co-located.
                int VarOrder = (alleleOrdering.OrderAlleles(CurrentVariant, NextVariantList.First()));

                switch (VarOrder)
                {
                case 0:     //the variant we just got is at out current position
                    CoLocatedVariants.AddRange(NextVariantList);
                    break;

                case -1:                             //the variant we just got is after our current position, and needs to go to the backlog.
                    TheBackLog    = NextVariantList; //NextVariant;
                    ContinueReadA = false;
                    BackLogExists = true;
                    break;

                default:     //
                {
                    throw new InvalidDataException("Vcf needs to be ordered.");
                }
                }
            }

            if (!BackLogExists)
            {
                TheBackLog = null;
            }

            return(CoLocatedVariants);
        }
Exemplo n.º 5
0
        public static string WriteCountsFile(string vcfIn, string outDir, int lociCount)
        {
            var variants      = new List <CalledAllele>();
            var countsPath    = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts"));
            var countsPathOld = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts.original"));

            if (File.Exists(countsPath))
            {
                if (File.Exists(countsPathOld))
                {
                    File.Delete(countsPathOld);
                }
                File.Copy(countsPath, countsPathOld);
                File.Delete(countsPath);
            }

            var counter = new MutationCounter();

            using (AlleleReader readerA = new AlleleReader(vcfIn))
            {
                counter.StartWriter(countsPath);

                while (readerA.GetNextVariants(out variants))
                {
                    foreach (var variant in variants)
                    {
                        try
                        {
                            counter.Add(variant);
                        }

                        catch (Exception ex)
                        {
                            Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                            variant.Chromosome, variant.ReferencePosition, ex));
                            throw;
                        }
                    }
                }

                if (lociCount > 0)
                {
                    counter.ForceTotalPossibleMutations(lociCount);
                }

                counter.CloseWriter();
            }

            return(countsPath);
        }
Exemplo n.º 6
0
        public static bool GetNextUncrushedAllele(AlleleReader reader, out CalledAllele variant)
        {
            var  nextVariants = new List <CalledAllele>();
            bool worked       = reader.GetNextVariants(out nextVariants);

            variant = null;

            if (worked)
            {
                if (nextVariants.Count > 1)
                {
                    throw new ArgumentException("Input file should not have crushed variants. There should only be one variant per line");
                }

                variant = nextVariants[0];
            }

            return(worked);
        }
        public RecalibratedVariantsTests()
        {
            RecalCollection = new RecalibratedVariantsCollection();
            var vcfPath = Path.Combine(TestPaths.LocalTestDataDirectory, "VariantDepthReaderTest.vcf");

            using (var reader = new AlleleReader(vcfPath))
            {
                var coLocatedVariantList = new List <CalledAllele>();
                var lastVariant          = new CalledAllele();
                while (reader.GetNextVariants(out coLocatedVariantList))
                {
                    var variant = coLocatedVariantList[0];
                    if (lastVariant.ReferencePosition == variant.ReferencePosition)
                    {
                        continue;
                    }

                    RecalCollection.AddLocus(variant);
                    lastVariant = variant;
                    variant     = new CalledAllele();
                }
            }
        }
Exemplo n.º 8
0
        public void GetNextVariantTests()
        {
            var resultVariant  = new CalledAllele();
            var resultVariants = new List <CalledAllele> {
                resultVariant
            };
            string resultString = string.Empty;
            var    vr           = new AlleleReader(VcfTestFile_1);

            vr.GetNextVariants(out resultVariants, out resultString);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	10	.	A	.	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            Assert.Equal(resultVariants[0].Chromosome, "chr1");
            Assert.Equal(resultVariants[0].ReferenceAllele, "A");
            Assert.Equal(resultVariants[0].AlternateAllele, ".");

            //Note, we have seen this assert below fail for specific user configurations
            //When it fails the error mesg is as below:
            //Assert.Equal() Failure
            //Expected: 1428
            //Actual: 1452
            //If this happens to you, check your git attributes config file.
            //You might be handling vcf text file line endings differently so the white space counts differently in this test.
            // In that case, the fail is purely cosmetic.
            //
            //try: Auto detect text files and perform LF normalization
            //# http://davidlaing.com/2012/09/19/customise-your-gitattributes-to-become-a-git-ninja/
            //*text = auto
            //*.cs     diff = csharp
            //*.bam binary
            //*.vcf text
            //.fa text eol = crlf

            if (vr.Position() == 1428)
            {
                Console.WriteLine("This isn't critical, but you might want to change your line endings convention. ");
                Console.WriteLine("This project was developed with \\CR\\LF , not \\LF convention.");
            }
            else
            {
                Assert.Equal(1452, vr.Position());
            }

            var resultStringArray = new string[] { };

            resultVariant  = new CalledAllele();
            resultVariants = new List <CalledAllele> {
                resultVariant
            };

            vr.GetNextVariants(out resultVariants, out resultString);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	20	.	A	T	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            for (var i = 0; i < resultStringArray.Length; i++)
            {
                resultStringArray[i] = resultStringArray[i].TrimEnd('\r');
            }
            Assert.Equal(resultVariants[0].Chromosome, "chr1");

            resultVariant  = new CalledAllele();
            resultVariants = new List <CalledAllele> {
                resultVariant
            };

            vr.GetNextVariants(out resultVariants);
            Assert.Equal(resultVariants[0].Chromosome, "chr1");
            Assert.Equal(resultVariants[0].ReferenceAllele, "A");
            Assert.Equal(resultVariants[0].AlternateAllele, "AT");
        }
Exemplo n.º 9
0
        /// <summary>
        /// perfom a Venn split between two samples
        /// </summary>
        /// <param name="sampleName"></param>
        /// <param name="consensusFilePath"></param>
        /// <param name="inputPaths"></param>
        /// <param name="outputTwoSampleResults"></param>
        public void DoPairwiseVenn()
        {
            bool doConsensus      = (consensusBuilder != null);
            bool requireGenotypes = false;

            var backLogPoolAAlleles = new List <CalledAllele>()
            {
            };
            var backLogPoolBAlleles = new List <CalledAllele>()
            {
            };



            using (AlleleReader ReaderA = new AlleleReader(_inputPaths[0], requireGenotypes))
                using (AlleleReader ReaderB = new AlleleReader(_inputPaths[1], requireGenotypes))
                {
                    var vcfA_HeaderLines     = ReaderA.HeaderLines;
                    var chrOrderingFromInput = ChrCompare.GetChrListFromVcfHeader(vcfA_HeaderLines);
                    var alleleCompareByLoci  = new AlleleCompareByLoci(chrOrderingFromInput);

                    if (doConsensus)
                    {
                        consensusBuilder.OpenConsensusFile(vcfA_HeaderLines);
                    }

                    OpenVennDiagramStreams(ReaderA.HeaderLines);

                    //read the first variant from each gvcf file...
                    var currentAllele = new CalledAllele();

                    var backLogExistPoolA = ReaderA.GetNextVariants(out backLogPoolAAlleles);
                    var backLogExistPoolB = ReaderB.GetNextVariants(out backLogPoolBAlleles);

                    //keep reading and processing until we are done with both gvcfs
                    while (true)
                    {
                        try
                        {
                            //1) Get the next set of variants. Pull from the backlog first,
                            //choosing all the variants at the first available position.
                            var coLocatedPoolAAlleles = new List <CalledAllele>();
                            var coLocatedPoolBAlleles = new List <CalledAllele>();

                            //We need to set up which location to look at next.
                            //Choose the first one from the backlog.

                            if (backLogExistPoolA || backLogExistPoolB)
                            {
                                if (backLogExistPoolA && backLogExistPoolB)
                                {
                                    int OrderResult = alleleCompareByLoci.OrderAlleles(
                                        backLogPoolAAlleles.First(), backLogPoolBAlleles.First());
                                    if (OrderResult < 0)
                                    {
                                        currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                    }
                                    else
                                    {
                                        currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                    }
                                }
                                else if (backLogExistPoolB)
                                {
                                    currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                }
                                else //if (backLogExistPoolA)
                                {
                                    currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                }

                                //assemble lists of co-located variants at the position of the current variant
                                coLocatedPoolAAlleles = AssembleColocatedList(ReaderA, currentAllele, alleleCompareByLoci,
                                                                              ref backLogExistPoolA, ref backLogPoolAAlleles);

                                coLocatedPoolBAlleles = AssembleColocatedList(ReaderB, currentAllele, alleleCompareByLoci,
                                                                              ref backLogExistPoolB, ref backLogPoolBAlleles);
                            } //else, if there is nothing in either backlog, the colocated-variant list should stay empty.

                            //2) Now we have finshed reading out all the co-located variants...
                            //We need organize them into pairs, to know which allele to compare with which.
                            var             Pairs                      = SelectPairs(coLocatedPoolAAlleles, coLocatedPoolBAlleles);
                            var             ConsensusVariants          = new List <CalledAllele>();
                            AggregateAllele lastConsensusReferenceCall = null;

                            //3) For each pair, combine them and mark if biased or not.
                            for (int PairIndex = 0; PairIndex < Pairs.Count; PairIndex++)
                            {
                                var VariantA = Pairs[PairIndex][0];
                                var VariantB = Pairs[PairIndex][1];

                                var ComparisonCase = GetComparisonCase(VariantA, VariantB);


                                //add VarA and VarB to appropriate venn diagram files.
                                WriteVarsToVennFiles(ComparisonCase, VariantA, VariantB);
                                AggregateAllele Consensus = null;

                                if (doConsensus)
                                {
                                    Consensus = consensusBuilder.CombineVariants(
                                        VariantA, VariantB, ComparisonCase);


                                    //Its possible for multiallelic sites, a pair of variants could
                                    //end up as a concensus reference. And we already may have
                                    //called a reference for this loci already.
                                    //we might have some cleaning up to do...
                                    if (Consensus.Genotype == Pisces.Domain.Types.Genotype.HomozygousRef)
                                    {
                                        //this is the first time we see a reference at this loci
                                        if (lastConsensusReferenceCall == null)
                                        {
                                            lastConsensusReferenceCall = Consensus;
                                            //its OK to fall through and add our Consensus variant to the list.
                                        }

                                        //Else, if we have already called a reference variant
                                        // for this loci already
                                        // we want to merge the results from this reference with the old one.
                                        // *before* we write it to file.
                                        else
                                        {
                                            //the chr, pos, ref, alt,and depth should be correct.
                                            //We'll merge the filters,
                                            //and take the max SB and PB. (where a higher value indicates worse value, so we stay conservative)
                                            lastConsensusReferenceCall.Filters = ConsensusBuilder.CombineFilters(lastConsensusReferenceCall, Consensus);

                                            lastConsensusReferenceCall.StrandBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.StrandBiasResults.GATKBiasScore, Consensus.StrandBiasResults.GATKBiasScore)
                                            };

                                            lastConsensusReferenceCall.PoolBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.PoolBiasResults.GATKBiasScore, Consensus.PoolBiasResults.GATKBiasScore)
                                            };

                                            //we are going to take the min Q and NL score, to be conservative
                                            lastConsensusReferenceCall.NoiseLevelApplied = Math.Min(lastConsensusReferenceCall.NoiseLevelApplied, Consensus.NoiseLevelApplied);
                                            lastConsensusReferenceCall.GenotypeQscore    = Math.Min(lastConsensusReferenceCall.GenotypeQscore, Consensus.GenotypeQscore);
                                            lastConsensusReferenceCall.VariantQscore     = Math.Min(lastConsensusReferenceCall.VariantQscore, Consensus.GenotypeQscore);

                                            continue;
                                        }
                                    }

                                    ConsensusVariants.Add(Consensus);
                                }
                            }

                            //4) Write out the results to file. (this will be a list of co-located variants)

                            if (doConsensus)
                            {
                                consensusBuilder.WriteConsensusVariantsToFile(ConsensusVariants);
                            }

                            //we assembled everyone and no one is left.
                            if ((backLogPoolAAlleles == null) &&
                                (backLogPoolBAlleles == null))
                            {
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                            OnError(string.Format("Fatal error encountered comparing paired sample vcfs; Check {0}, position {1}.  Exception: {2}",
                                                  currentAllele.Chromosome, currentAllele.ReferencePosition, ex));
                            throw;
                        }
                    } //close assemble list
                }//close usings

            if (doConsensus)
            {
                consensusBuilder.CloseConsensusFile();
            }

            CloseVennDiagramStreams();
        }