Beispiel #1
0
        /// <summary>
        /// Step forward with the reader, assembling a list of variants at your CurrentVariant position.
        /// </summary>
        /// <param name="Reader"></param>
        /// <param name="CurrentVariant"></param>
        /// <param name="BackLogExists"></param>
        /// <param name="TheBackLog"></param>
        /// <returns></returns>
        private static List <CalledAllele> AssembleColocatedList(
            VcfReader Reader, CalledAllele CurrentVariant, bool mFirst,
            ref bool BackLogExists, ref List <CalledAllele> TheBackLog)
        {
            List <CalledAllele> CoLocatedVariants = new List <CalledAllele>();
            bool ContinueReadA = true;

            while (ContinueReadA)
            {
                var NextVariantList = new List <CalledAllele>();

                if (BackLogExists)
                {
                    NextVariantList = TheBackLog;
                    BackLogExists   = false;
                }
                else
                {
                    VcfVariant NextVariant = new VcfVariant();
                    ContinueReadA = Reader.GetNextVariant(NextVariant);

                    if (!ContinueReadA)
                    {
                        break;
                    }

                    NextVariantList = VcfVariantUtilities.Convert(new List <VcfVariant> {
                        NextVariant
                    }).ToList();
                }

                // VarOrde =  -1 if Current comes first, 0 if co-located.
                int VarOrder = (AlleleCompareByLoci.OrderAlleles(CurrentVariant, NextVariantList.First(), mFirst));

                switch (VarOrder)
                {
                case 0:     //the variant we just got is at out current position
                    CoLocatedVariants.AddRange(NextVariantList);
                    break;

                case -1:                             //the variant we just got is after our current position, and needs to go to the backlog.
                    TheBackLog    = NextVariantList; //NextVariant;
                    ContinueReadA = false;
                    BackLogExists = true;
                    break;

                default:     //
                {
                    throw new InvalidDataException("Vcf needs to be ordered.");
                }
                }
            }

            if (!BackLogExists)
            {
                TheBackLog = null;
            }

            return(CoLocatedVariants);
        }
Beispiel #2
0
 public void CompareSubstring()
 {
     Assert.True(VcfVariantUtilities.CompareSubstring("ABC", "ABC", 0));
     Assert.True(VcfVariantUtilities.CompareSubstring("C", "ABC", 2));
     Assert.False(VcfVariantUtilities.CompareSubstring("ABD", "ABC", 0));
     Assert.False(VcfVariantUtilities.CompareSubstring("ABD", "ABC", 2));
 }
 public VQRVcfWriter(string outputFilePath, VcfWriterConfig config, VcfWriterInputContext context, List <string> originalHeader, string phasingCommandLine, int bufferLimit = 2000) : base(outputFilePath, config, context, bufferLimit)
 {
     _originalHeader              = originalHeader;
     _originalFilterLines         = VcfVariantUtilities.GetFilterStringsByType(originalHeader);
     _formatter                   = new VcfFormatter(config);
     AllowMultipleVcfLinesPerLoci = config.AllowMultipleVcfLinesPerLoci;
     _vqrCommandLine              = phasingCommandLine;
 }
Beispiel #4
0
 public VennVcfWriter(string outputFilePath, VcfWriterConfig config, VcfWriterInputContext context,
                      List <string> originalHeader, string vennVcfCommandLine, int bufferLimit = 2000, bool debugMode = false) : base(outputFilePath, config, context, bufferLimit)
 {
     _originalHeader              = originalHeader;
     _originalFilterLines         = VcfVariantUtilities.GetFilterStringsByType(originalHeader);
     _formatter                   = new VennVcfFormatter(config, debugMode);
     AllowMultipleVcfLinesPerLoci = config.AllowMultipleVcfLinesPerLoci;
     _vennCommandLine             = vennVcfCommandLine;
 }
Beispiel #5
0
        private static void CheckRMxN(string filter, int expectedM, int expectedN, bool expectedIsRMxN)
        {
            int m       = -1;
            int n       = -1;
            var worked1 = VcfVariantUtilities.IsRMxN(filter);
            var worked2 = VcfVariantUtilities.IsRMxN(filter, out m, out n);

            Assert.Equal(expectedM, m);
            Assert.Equal(expectedN, n);
            Assert.Equal(expectedIsRMxN, worked1);
            Assert.Equal(expectedIsRMxN, worked2);
        }
        public static void CheckVariantsMatch(VcfVariant baseline, CalledAllele test)
        {
            Assert.Equal(baseline.ReferenceAllele, test.ReferenceAllele);
            Assert.Equal(baseline.VariantAlleles[0], test.AlternateAllele);
            Assert.Equal(baseline.VariantAlleles.Length, 1);
            Assert.Equal(baseline.ReferenceName, test.Chromosome);
            Assert.Equal(baseline.ReferencePosition, test.ReferencePosition);

            int numAlts = (baseline.VariantAlleles[0] == ".") ? 0 : baseline.VariantAlleles.Length;

            Assert.Equal(VcfVariantUtilities.MapGTString(baseline.Genotypes[0]["GT"], numAlts), test.Genotype);
        }
Beispiel #7
0
        private IEnumerable <CalledAllele> GetNextBlockOfOriginalAllelesFromVcfVar()
        {
            var  vcfVar = new VcfVariant();
            bool worked = _variantSource.GetNextVariant(vcfVar);

            if (!worked)
            {
                return(new List <CalledAllele>());
            }

            return(VcfVariantUtilities.Convert(new List <VcfVariant> {
                vcfVar
            }));
        }
Beispiel #8
0
        public void TestGetNumTrailingAgreement()
        {
            Assert.Equal(0, VcfVariantUtilities.GetNumTrailingAgreement("ACGT", "CGTA"));
            Assert.Equal(1, VcfVariantUtilities.GetNumTrailingAgreement("AGT", "CGTAT"));
            Assert.Equal(2, VcfVariantUtilities.GetNumTrailingAgreement("ACGTGGG", "CGTAGG"));
            Assert.Equal(3, VcfVariantUtilities.GetNumTrailingAgreement("AAAA", "AAA"));
            Assert.Equal(3, VcfVariantUtilities.GetNumTrailingAgreement("AAA", "AAAA"));
            Assert.Equal(4, VcfVariantUtilities.GetNumTrailingAgreement("ACGT", "ACGT"));
            Assert.Equal(0, VcfVariantUtilities.GetNumTrailingAgreement("TAAAC", "CAAAAT"));

            Assert.Equal(0, VcfVariantUtilities.GetNumTrailingAgreement("TAATA", "TAATC"));
            Assert.Equal(2, VcfVariantUtilities.GetNumTrailingAgreement("TAATA", "TACTA"));
            Assert.Equal(3, VcfVariantUtilities.GetNumTrailingAgreement("TACGTG", "TATGTG"));
            Assert.Equal(3, VcfVariantUtilities.GetNumTrailingAgreement("TACGTG", "TAGTG"));
            Assert.Equal(3, VcfVariantUtilities.GetNumTrailingAgreement("TAGTG", "TACGTG"));
        }
Beispiel #9
0
        public void MapGT()
        {
            var numTestsNeeded = 13;

            Assert.Equal(Genotype.Alt12LikeNoCall, VcfVariantUtilities.MapGTString("./.", 2));
            Assert.Equal(Genotype.AltAndNoCall, VcfVariantUtilities.MapGTString("1/.", 1));
            Assert.Equal(Genotype.AltLikeNoCall, VcfVariantUtilities.MapGTString("./.", 1));
            Assert.Equal(Genotype.HeterozygousAlt1Alt2, VcfVariantUtilities.MapGTString("1/2", 2));
            Assert.Equal(Genotype.HeterozygousAltRef, VcfVariantUtilities.MapGTString("0/1", 1));
            Assert.Equal(Genotype.HomozygousAlt, VcfVariantUtilities.MapGTString("1/1", 1));
            Assert.Equal(Genotype.HomozygousRef, VcfVariantUtilities.MapGTString("0/0", 0));
            Assert.Equal(Genotype.RefAndNoCall, VcfVariantUtilities.MapGTString("0/.", 0));
            Assert.Equal(Genotype.RefLikeNoCall, VcfVariantUtilities.MapGTString("./.", 0));
            Assert.Equal(Genotype.HemizygousAlt, VcfVariantUtilities.MapGTString("1", 1));
            Assert.Equal(Genotype.HemizygousRef, VcfVariantUtilities.MapGTString("0", 1));
            Assert.Equal(Genotype.HemizygousNoCall, VcfVariantUtilities.MapGTString(".", 1));

            //sanity check we covered all the possibilities.
            Assert.Equal(Enum.GetValues(typeof(Genotype)).Length, numTestsNeeded);
        }
        /// There are currenlty 1 filter that VQR can add: q{N} for low variant quality scores.
        /// We need to check that this gets added, if the config requires it.
        public void AdjustHeaderLines()
        {
            var originalFilterLines = VcfVariantUtilities.GetFilterStringsByType(_originalHeader);
            var vqrFilterLines      = _formatter.GenerateFilterStringsByType();

            //Pisces might have used these, but VQR (currently) never does.
            //So we only write them to header if Pisces already has them in the header.
            vqrFilterLines.Remove(FilterType.RMxN);
            vqrFilterLines.Remove(FilterType.IndelRepeatLength);
            vqrFilterLines.Remove(FilterType.NoCall);

            int lastFilterIndex = _originalHeader.FindLastIndex(x => x.Contains("##FILTER"));

            if (lastFilterIndex == -1)
            {
                lastFilterIndex = Math.Max(_originalHeader.Count - 2, -1);
            }

            foreach (var pair in vqrFilterLines)
            {
                var vqrFilter = pair.Key;
                var vqrString = pair.Value;

                if (!originalFilterLines.ContainsKey(vqrFilter))
                {
                    lastFilterIndex++;
                    _originalHeader.Insert(lastFilterIndex, vqrString.Replace("\">", ", by VQR\">"));
                }
                else
                {
                    //we already have this filter listed... but is the string the same? it should be.
                    if (vqrString.Trim() != originalFilterLines[vqrFilter].Trim()) //be gentle about line endings..
                    {
                        lastFilterIndex++;
                        _originalHeader.Insert(lastFilterIndex, vqrString.Replace("\">", ", by VQR\">"));
                    }

                    //else, the filter values are the same.
                }
            }
        }
Beispiel #11
0
        public static void DoReformating(string inputFile, bool crush)
        {
            var outputFile = inputFile.Replace(".vcf", ".uncrushed.vcf");

            if (crush)
            {
                Console.WriteLine("crushing " + inputFile + "...");
                outputFile = inputFile.Replace(".vcf", ".crushed.vcf");
            }
            else
            {
                Console.WriteLine("uncrushing " + inputFile + "...");
            }

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            var config = new VcfWriterConfig()
            {
                AllowMultipleVcfLinesPerLoci = !crush
            };

            using (VcfFileWriter writer = new VcfFileWriter(outputFile, config, new VcfWriterInputContext()))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(inputFile, false))
                {
                    var currentAllele     = new CalledAllele();
                    var backLogVcfVariant = new VcfVariant();

                    var backLogExists = reader.GetNextVariant(backLogVcfVariant);

                    while (backLogExists)
                    {
                        var backLogAlleles = backLogExists ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                            backLogVcfVariant
                        }).ToList() : null;

                        foreach (var allele in backLogAlleles)
                        {
                            try
                            {
                                writer.Write(new List <CalledAllele>()
                                {
                                    allele
                                });
                            }
                            catch (Exception ex)
                            {
                                Console.WriteLine("Problem writing " + allele.ToString());
                                Console.WriteLine("Exception: " + ex);
                                return;
                            }
                        }


                        backLogExists = reader.GetNextVariant(backLogVcfVariant);

                        if (backLogAlleles[0].Chromosome != backLogVcfVariant.ReferenceName)
                        {
                            //we have switched to the next chr. flush the buffer.
                            writer.FlushBuffer();
                        }
                    }

                    writer.FlushBuffer();
                }
            }
        }
Beispiel #12
0
        /// <summary>
        /// Warning #1. This algorithm has an inherent assumption:
        /// the VS must be in order of their true position (first base of difference).
        /// Thats not always how they appeared in the vcf.
        /// Warning #2. Variants are typically reported in the VCF on their first base of difference
        /// from the reference genome (or in the case of indels, one base before).
        /// However, in germline (crushed) formatting, Scylla reports all variants in a nbhd
        /// at the same (anchored) position. This is because there can only ever be two alleles
        /// given the diploid assumption. So, you cant report 5 different alleles at 5 spots withn the neighborhood.
        /// IE, somatic genotyping/reporting is loci-specific.
        /// but diploid genotyping/reporting is is forced to be consistent through the whole neighborhood.
        /// </summary>
        /// <param name="allele"> allele we are going to create from the cluster</param>
        /// <param name="clusterVariantSites">the variant site results for the cluster</param>
        /// <param name="referenceSequence">the reference seqeunce, so we can populate inbetween the MNVs</param>
        /// <param name="neighborhoodDepthAtSites">depths needed to populate the new allele</param>
        /// <param name="clusterCountsAtSites">call counts needed to populate the new allele</param>
        /// <param name="chromosome">chr needed to populate the new allele</param>
        /// <param name="qNoiselevel">NL needed to populate the new allele</param>
        /// <param name="maxQscore">Q max needed to determine Q score of the new allele</param>
        /// <param name="anchorPosition">if we are forcing the allele to be at a given position, instead of the poisition it would naturally be at in the VCF file</param>
        /// <returns></returns>
        public static Dictionary <int, SuckedUpRefRecord> Extract(out CalledAllele allele,
                                                                  VariantSite[] clusterVariantSites, string referenceSequence, int[] neighborhoodDepthAtSites, int[] neighborhoodNoCallsAtSites, int clusterRefSupport,
                                                                  int[] clusterCountsAtSites, string chromosome, int qNoiselevel, int maxQscore, int anchorPosition = -1)
        {
            if (clusterVariantSites.Length != neighborhoodDepthAtSites.Length || neighborhoodDepthAtSites.Length != clusterCountsAtSites.Length)
            {
                throw new InvalidDataException("Variant sites, depths, and counts arrays are different lengths.");
            }

            var referenceRemoval = new Dictionary <int, SuckedUpRefRecord>();

            // Initialize items we'll eventually use to build a variant.
            var alleleReference = "";
            var alleleAlternate = "";
            var totalCoverage   = 0;
            var varCount        = 0;
            var noCallCount     = 0;

            // Initialize trackers
            var referenceCallsSuckedIntoMnv = new List <int>();
            var nocallsInsideMnv            = new List <int>();
            var depthsInsideMnv             = new List <int>();
            var countsInsideMnv             = new List <int>();

            var lastRefBaseSitePosition  = clusterVariantSites[0].VcfReferencePosition;
            var firstVariantSitePosition = clusterVariantSites[0].VcfReferencePosition;
            var differenceStarted        = false;

            bool usingAnchor = (anchorPosition != -1);

            if (usingAnchor)
            {
                lastRefBaseSitePosition = anchorPosition - 1;
            }

            // Walk through the cluster's variant sites and build up ref/alt strings, average support, and average coverage
            for (var siteIndex = 0; siteIndex < clusterVariantSites.Length; siteIndex++)
            {
                var consensusSite = clusterVariantSites[siteIndex];

                var refAlleleToAdd  = consensusSite.TrueRefAllele;
                var altAlleleToAdd  = consensusSite.TrueAltAllele;
                var currentPosition = consensusSite.TrueFirstBaseOfDiff;
                var diff            = lastRefBaseSitePosition - currentPosition;

                // no variant here...
                if (refAlleleToAdd == altAlleleToAdd)
                {
                    continue;
                }

                if (differenceStarted && (diff >= 0))
                {
                    //We have a problem. the last site we added overlaps with the current site we want to add.
                    //The probably are not in conflict. But we will had to do some kind of sub string to get this right..

                    var lengthToTrimFromStart = diff + 1;

                    if ((lengthToTrimFromStart < consensusSite.TrueAltAllele.Length) &&
                        (lengthToTrimFromStart < consensusSite.TrueRefAllele.Length))
                    {
                        refAlleleToAdd  = consensusSite.TrueRefAllele.Substring(lengthToTrimFromStart);
                        altAlleleToAdd  = consensusSite.TrueAltAllele.Substring(lengthToTrimFromStart);
                        currentPosition = consensusSite.TrueFirstBaseOfDiff + lengthToTrimFromStart;
                    }
                    else
                    {
                        continue; //if the last variant site entirely covered this one, just dont worry about it.
                    }
                }



                // Nima: In diploid mode (usingAnchor == 1), any ref after anchor gets used up. I'm not sure if this is intented.
                // TJD - yes, intended.  For germline we phase a whole anchored block at a time, including reference
                if (differenceStarted || usingAnchor)
                {
                    var gapLength            = currentPosition - lastRefBaseSitePosition - 1;
                    var suckedUpRefPositions = new List <int>();
                    for (var i = 0; i < gapLength; i++)
                    {
                        var refPosition = lastRefBaseSitePosition + i + 1;
                        suckedUpRefPositions.Add(refPosition);
                    }
                    referenceCallsSuckedIntoMnv.AddRange(suckedUpRefPositions);

                    var gapFiller = FillGapWithReferenceData(referenceSequence,
                                                             clusterVariantSites[0], suckedUpRefPositions);

                    alleleReference += gapFiller;
                    alleleAlternate += gapFiller;
                }

                if (!differenceStarted)
                {
                    firstVariantSitePosition = currentPosition;
                }

                differenceStarted = true;
                depthsInsideMnv.Add(neighborhoodDepthAtSites[siteIndex]);
                countsInsideMnv.Add(clusterCountsAtSites[siteIndex]);
                nocallsInsideMnv.Add(neighborhoodNoCallsAtSites[siteIndex]);

                //this takes into account taking deletions out of the ref allele.
                lastRefBaseSitePosition = currentPosition + refAlleleToAdd.Length - 1;

                alleleReference += refAlleleToAdd;
                alleleAlternate += altAlleleToAdd;
            }



            if (differenceStarted)
            {
                //remove any trailing bases of agreement.
                var numTrailingBasesOfAgreement = VcfVariantUtilities.GetNumTrailingAgreement(alleleReference, alleleAlternate);

                //remove traling bases
                alleleReference = alleleReference.Substring(0, alleleReference.Length - numTrailingBasesOfAgreement);
                alleleAlternate = alleleAlternate.Substring(0, alleleAlternate.Length - numTrailingBasesOfAgreement);
            }
            //if we are not anchored, we trim off preceding bases of agreement, and move up the cooridnate to
            //the first base of difference.
            var numPrecedingBasesOfAgreement = usingAnchor ? 0 : VcfVariantUtilities.GetNumPrecedingAgreement(alleleReference, alleleAlternate);

            alleleReference = alleleReference.Substring(numPrecedingBasesOfAgreement,
                                                        alleleReference.Length - numPrecedingBasesOfAgreement);
            alleleAlternate = alleleAlternate.Substring(numPrecedingBasesOfAgreement,
                                                        alleleAlternate.Length - numPrecedingBasesOfAgreement);



            if (!differenceStarted || (alleleReference.Length == 0) && (alleleAlternate.Length == 0))
            {
                //taking out the preceding bases, the phased variant compacted to nothing!
                allele = Create(chromosome, -1, alleleReference, alleleAlternate, varCount, noCallCount, totalCoverage, clusterRefSupport, AlleleCategory.Reference, qNoiselevel, maxQscore);
                return(referenceRemoval);
            }

            // take average counts and depth through MNV
            // the only "holes" that lower these counts are Ns
            totalCoverage = depthsInsideMnv.Any() ? (int)depthsInsideMnv.Average() : 0;
            varCount      = countsInsideMnv.Any() ? (int)countsInsideMnv.Average() : 0;
            noCallCount   = nocallsInsideMnv.Any() ? (int)nocallsInsideMnv.Average() : 0;

            var trueStartPosition = usingAnchor ? anchorPosition : firstVariantSitePosition + numPrecedingBasesOfAgreement;

            var indexIntoRef    = (trueStartPosition - 1) - clusterVariantSites[0].VcfReferencePosition;
            var prependableBase = "R";

            if ((indexIntoRef >= 0) && (indexIntoRef < referenceSequence.Length))
            {
                prependableBase = referenceSequence[indexIntoRef].ToString();
            }

            //compacted to an insertion
            if ((alleleReference.Length == 0) && (alleleAlternate.Length != 0))
            {
                allele = Create(chromosome, trueStartPosition - 1, prependableBase + alleleReference, prependableBase + alleleAlternate,
                                varCount, noCallCount, totalCoverage, clusterRefSupport, AlleleCategory.Insertion, qNoiselevel, maxQscore);
            }
            //compacted to an insertion
            else if ((alleleReference.Length != 0) && (alleleAlternate.Length == 0))
            {
                allele = Create(chromosome, trueStartPosition - 1, prependableBase + alleleReference, prependableBase + alleleAlternate,
                                varCount, noCallCount, totalCoverage, clusterRefSupport, AlleleCategory.Deletion, qNoiselevel, maxQscore);
            }
            else  //MNV,pretty much what we were expecting. (and every time we are using an anchor)
            {
                allele = Create(chromosome, trueStartPosition, alleleReference, alleleAlternate,
                                varCount, noCallCount, totalCoverage, clusterRefSupport, AlleleCategory.Mnv, qNoiselevel, maxQscore);
            }


            if (varCount == 0)
            {
                allele = Create(chromosome, trueStartPosition, alleleReference, ".",
                                varCount, noCallCount, totalCoverage, clusterRefSupport, AlleleCategory.Reference, qNoiselevel, maxQscore);
            }

            foreach (var suckedupRefPos in referenceCallsSuckedIntoMnv)
            {
                if ((usingAnchor) || (suckedupRefPos > trueStartPosition))
                {
                    var suckedUpRefRecord = new SuckedUpRefRecord()
                    {
                        Counts = varCount, AlleleThatClaimedIt = allele
                    };
                    referenceRemoval.Add(suckedupRefPos, suckedUpRefRecord);
                }
            }

            return(referenceRemoval);
        }
Beispiel #13
0
        public static void DoFiltering(PsaraOptions settings)
        {
            var geometricFilter = new GeometricFilter(settings.GeometricFilterParameters);
            //maybe expand to add other filters..

            var vcfIn   = settings.InputVcf;
            var vcfName = Path.GetFileName(vcfIn);

            var outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf"));

            outputFile = outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf");

            Logger.WriteToLog("filtering " + vcfIn + "...");

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            List <string>   header  = VcfReader.GetAllHeaderLines(vcfIn);
            string          cmdLine = "##Psara_cmdline=" + settings.QuotedCommandLineArgumentsString;
            VcfWriterConfig config  = GetWriterConfigToMatchInputVcf(vcfIn);

            using (PsaraVcfWriter writer = new PsaraVcfWriter(outputFile, config, new VcfWriterInputContext(), header, cmdLine))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(vcfIn, false))
                {
                    var backLogVcfVariant = new VcfVariant();
                    var coLocatedAlleles  = new List <CalledAllele>();
                    var moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                    var incomingBatch     = new List <CalledAllele>();


                    while (moreVariantsInVcf)
                    {
                        if (incomingBatch.Count == 0)
                        {
                            incomingBatch = moreVariantsInVcf ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                                backLogVcfVariant
                            },
                                                                                            config.ShouldOutputRcCounts, config.ShouldOutputTsCounts, false).ToList() : null;
                            moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                        }
                        if ((coLocatedAlleles.Count == 0) || AreColocated(coLocatedAlleles, incomingBatch))
                        {
                            coLocatedAlleles.AddRange(incomingBatch);
                            incomingBatch.Clear();

                            //colocated alleles are left behind
                        }
                        else
                        {
                            FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);
                            coLocatedAlleles.Clear();

                            //incomingBatch alleles are left behind
                        }
                    }

                    //if you get here, there is no more unprocessed vcf variants but there could be
                    //colocated or an incoming batch of alleles left over. We need to write them to file before exiting.

                    FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);

                    FilterAndStreamOut(incomingBatch, writer, geometricFilter);
                }
            }
        }
        public void VennVcf_CombineTwoPoolVariants_RulesAthroughD_Tests()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S7.genome.vcf");
            List <CalledAllele> PoolAVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolA)).ToList();

            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S8.genome.vcf");
            List <CalledAllele> PoolBVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolB)).ToList();

            CalledAllele VariantA = PoolAVariants[0];
            CalledAllele VariantB = PoolBVariants[0];

            List <CalledAllele[]> pairs = VennProcessor.SelectPairs(
                new List <CalledAllele>()
            {
                VariantA
            },
                new List <CalledAllele>
            {
                VariantB
            });

            VariantComparisonCase ComparisonCase   = VennProcessor.GetComparisonCase(pairs[0][0], pairs[0][1]);
            ConsensusBuilder      consensusBuilder = new ConsensusBuilder("", parameters);
            CalledAllele          Consensus        = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            //Rule "A" test
            //A	if combined VF<1% and less than 2.6% in each pool, call REF
            //(note, we were Alt in one pool and ref in another)

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantA.Frequency, 0.9979, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0173, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(Consensus.Frequency, 0.9907, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will NOT added by post-processing b/c is ref call

            //B	if combined VF<1% and more than 2.6% in one pool, call NO CALL

            VariantA = PoolAVariants[1];
            VariantB = PoolBVariants[1];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0776, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9989, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0070, 4);
            Assert.Equal(Consensus.VariantQscore, 0);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType>
            {
                Pisces.Domain.Types.FilterType.PoolBias
            });                                          //<-low VF tag will also get added by post-processing

            //Rule "Ca" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and less than 1% in the other, call NO CALL w/PB

            VariantA = PoolAVariants[2];
            VariantB = PoolBVariants[2];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0367, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9976, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0117, 4);
            Assert.Equal(Consensus.VariantQscore, 23);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
                Pisces.Domain.Types.FilterType.PoolBias
            });
            //Rule "Cb" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and between 1% and 2.6% in the other, call NO CALL w/ no PB

            VariantA = PoolAVariants[3];
            VariantB = PoolBVariants[3];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.01725, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.03667, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.02347, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get added by post-processing

            //Rule "D" test
            //D	if combined VF>=2.6% call VARIANT (PB if only present in one pool, using 1% as the cutoff)

            VariantA = PoolAVariants[4];
            VariantB = PoolBVariants[4];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.2509, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0367, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(Consensus.Frequency, 0.1716, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get set by post processor
        }
Beispiel #15
0
        /// <summary>
        /// perfom a Venn split between two samples
        /// </summary>
        /// <param name="sampleName"></param>
        /// <param name="consensusFilePath"></param>
        /// <param name="inputPaths"></param>
        /// <param name="outputTwoSampleResults"></param>
        public void DoPairwiseVenn(bool mFirst)
        {
            bool doConsensus      = (consensusBuilder != null);
            bool requireGenotypes = false;

            using (VcfReader ReaderA = new VcfReader(_inputPaths[0], requireGenotypes))
                using (VcfReader ReaderB = new VcfReader(_inputPaths[1], requireGenotypes))
                {
                    if (doConsensus)
                    {
                        consensusBuilder.OpenConsensusFile(ReaderA.HeaderLines);
                    }

                    OpenVennDiagramStreams(ReaderA.HeaderLines);

                    //read the first variant from each gvcf file...
                    var currentAllele          = new CalledAllele();
                    var backLogPoolAVcfVariant = new VcfVariant();
                    var backLogPoolBVcfVariant = new VcfVariant();

                    var backLogExistPoolA = ReaderA.GetNextVariant(backLogPoolAVcfVariant);
                    var backLogExistPoolB = ReaderB.GetNextVariant(backLogPoolBVcfVariant);

                    var backLogPoolAAlleles = backLogExistPoolA ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                        backLogPoolAVcfVariant
                    }).ToList() : null;
                    var backLogPoolBAlleles = backLogExistPoolB ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                        backLogPoolBVcfVariant
                    }).ToList() : null;

                    //keep reading and processing until we are done with both gvcfs
                    while (true)
                    {
                        try
                        {
                            //1) Get the next set of variants. Pull from the backlog first,
                            //choosing all the variants at the first available position.
                            var coLocatedPoolAAlleles = new List <CalledAllele>();
                            var coLocatedPoolBAlleles = new List <CalledAllele>();

                            //We need to set up which location to look at next.
                            //Choose the first one from the backlog.

                            if (backLogExistPoolA || backLogExistPoolB)
                            {
                                if (backLogExistPoolA && backLogExistPoolB)
                                {
                                    int OrderResult = AlleleCompareByLoci.OrderAlleles(
                                        backLogPoolAAlleles.First(), backLogPoolBAlleles.First(), mFirst);
                                    if (OrderResult < 0)
                                    {
                                        currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                    }
                                    else
                                    {
                                        currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                        currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                    }
                                }
                                else if (backLogExistPoolB)
                                {
                                    currentAllele.Chromosome        = backLogPoolBAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolBAlleles.First().ReferencePosition;
                                }
                                else //if (backLogExistPoolA)
                                {
                                    currentAllele.Chromosome        = backLogPoolAAlleles.First().Chromosome;
                                    currentAllele.ReferencePosition = backLogPoolAAlleles.First().ReferencePosition;
                                }

                                //assemble lists of co-located variants at the position of the current variant
                                coLocatedPoolAAlleles = AssembleColocatedList(ReaderA, currentAllele, mFirst,
                                                                              ref backLogExistPoolA, ref backLogPoolAAlleles);

                                coLocatedPoolBAlleles = AssembleColocatedList(ReaderB, currentAllele, mFirst,
                                                                              ref backLogExistPoolB, ref backLogPoolBAlleles);
                            } //else, if there is nothing in either backlog, the colocated-variant list should stay empty.

                            //2) Now we have finshed reading out all the co-located variants...
                            //We need organize them into pairs, to know which allele to compare with which.
                            var             Pairs                      = SelectPairs(coLocatedPoolAAlleles, coLocatedPoolBAlleles);
                            var             ConsensusVariants          = new List <CalledAllele>();
                            AggregateAllele lastConsensusReferenceCall = null;

                            //3) For each pair, combine them and mark if biased or not.
                            for (int PairIndex = 0; PairIndex < Pairs.Count; PairIndex++)
                            {
                                var VariantA = Pairs[PairIndex][0];
                                var VariantB = Pairs[PairIndex][1];

                                var ComparisonCase = GetComparisonCase(VariantA, VariantB);


                                //add VarA and VarB to appropriate venn diagram files.
                                WriteVarsToVennFiles(ComparisonCase, VariantA, VariantB);
                                AggregateAllele Consensus = null;

                                if (doConsensus)
                                {
                                    Consensus = consensusBuilder.CombineVariants(
                                        VariantA, VariantB, ComparisonCase);


                                    //Its possible for multiallelic sites, a pair of variants could
                                    //end up as a concensus reference. And we already may have
                                    //called a reference for this loci already.
                                    //we might have some cleaning up to do...
                                    if (Consensus.Genotype == Pisces.Domain.Types.Genotype.HomozygousRef)
                                    {
                                        //this is the first time we see a reference at this loci
                                        if (lastConsensusReferenceCall == null)
                                        {
                                            lastConsensusReferenceCall = Consensus;
                                            //its OK to fall through and add our Consensus variant to the list.
                                        }

                                        //Else, if we have already called a reference variant
                                        // for this loci already
                                        // we want to merge the results from this reference with the old one.
                                        // *before* we write it to file.
                                        else
                                        {
                                            //the chr, pos, ref, alt,and depth should be correct.
                                            //We'll merge the filters,
                                            //and take the max SB and PB. (where a higher value indicates worse value, so we stay conservative)
                                            lastConsensusReferenceCall.Filters = ConsensusBuilder.CombineFilters(lastConsensusReferenceCall, Consensus);

                                            lastConsensusReferenceCall.StrandBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.StrandBiasResults.GATKBiasScore, Consensus.StrandBiasResults.GATKBiasScore)
                                            };

                                            lastConsensusReferenceCall.PoolBiasResults = new Pisces.Domain.Models.BiasResults()
                                            {
                                                GATKBiasScore = Math.Max(lastConsensusReferenceCall.PoolBiasResults.GATKBiasScore, Consensus.PoolBiasResults.GATKBiasScore)
                                            };

                                            //we are going to take the min Q and NL score, to be conservative
                                            lastConsensusReferenceCall.NoiseLevelApplied = Math.Min(lastConsensusReferenceCall.NoiseLevelApplied, Consensus.NoiseLevelApplied);
                                            lastConsensusReferenceCall.GenotypeQscore    = Math.Min(lastConsensusReferenceCall.GenotypeQscore, Consensus.GenotypeQscore);
                                            lastConsensusReferenceCall.VariantQscore     = Math.Min(lastConsensusReferenceCall.VariantQscore, Consensus.GenotypeQscore);

                                            continue;
                                        }
                                    }

                                    ConsensusVariants.Add(Consensus);
                                }
                            }

                            //4) Write out the results to file. (this will be a list of co-located variants)

                            if (doConsensus)
                            {
                                consensusBuilder.WriteConsensusVariantsToFile(ConsensusVariants);
                            }

                            //we assembled everyone and no one is left.
                            if ((backLogPoolAAlleles == null) &&
                                (backLogPoolBAlleles == null))
                            {
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                            OnError(string.Format("Fatal error encountered comparing paired sample vcfs; Check {0}, position {1}.  Exception: {2}",
                                                  currentAllele.Chromosome, currentAllele.ReferencePosition, ex));
                            throw;
                        }
                    } //close assemble list
                }//close usings

            if (doConsensus)
            {
                consensusBuilder.CloseConsensusFile();
            }

            CloseVennDiagramStreams();
        }
        /// <summary>
        ///     Calculates repeats of a part (front end, back end, or entirety) of an indel in the reference sequence. All possible prefix
        ///     and suffix subunits of the variant bases are calculated (up to a configured max unit length), and we then search for each unit
        ///     in the sequence directly flanking the variant position. Point of reference is the position of the base immediately preceding
        ///     the added or deleted bases. From there, we first look backward to see if there are instances of the unit directly before the
        ///     point of reference (inclusive). After ratcheting back to the first consecutive instance of the unit, we read through the sequence,
        ///     counting number of repeats of the unit. We return the maximum number of repeats found for any eligible unit.
        /// </summary>
        private static int ComputeRMxNLengthForIndel(int variantPosition, string variantBases, string referenceBases, int maxRepeatUnitLength)
        {
            var maxRepeatsFound = 0;
            var prefixes        = new List <string>();
            var suffixes        = new List <string>();
            var length          = variantBases.Length;

            for (var i = length - Math.Min(maxRepeatUnitLength, length); i < length; i++)
            {
                prefixes.Add(variantBases.Substring(0, length - i));
                suffixes.Add(variantBases.Substring(i, length - i));
            }
            var bookends = prefixes.Concat(suffixes);

            foreach (var bookend in bookends)
            {
                var backPeekPosition = variantPosition;

                // Keep ratcheting backward as long as this motif is repeating
                while (true)
                {
                    var newBackPeekPosition = backPeekPosition - bookend.Length;
                    if (newBackPeekPosition < 0)
                    {
                        break;
                    }

                    if (!VcfVariantUtilities.CompareSubstring(bookend, referenceBases, newBackPeekPosition))
                    {
                        break;
                    }

                    backPeekPosition = newBackPeekPosition;
                }

                // Read forward from first instance of motif, counting consecutive repeats
                var repeatCount     = 0;
                var currentPosition = backPeekPosition;
                while (true)
                {
                    if (currentPosition + bookend.Length > referenceBases.Length)
                    {
                        break;
                    }

                    if (!VcfVariantUtilities.CompareSubstring(bookend, referenceBases, currentPosition))
                    {
                        break;
                    }

                    repeatCount++;
                    currentPosition += bookend.Length;
                }

                if (repeatCount > maxRepeatsFound)
                {
                    maxRepeatsFound = repeatCount;
                }
            }

            return(maxRepeatsFound);
        }
Beispiel #17
0
        public IEnumerable <VcfNeighborhood> GetBatchOfNeighborhoods(int numNbhdsSoFar)
        {
            _nextBatchOfNeighborhoods.Clear();
            _nextBatchOfNeighborhoods.AddRange(_unfinshedNeighborhoods);
            _unfinshedNeighborhoods.Clear();

            bool keepAddingNbhdsToTheBatch = true;

            while (keepAddingNbhdsToTheBatch)
            {
                keepAddingNbhdsToTheBatch = _vcfVariantSource.GetNextVariant(_tempRawVcfVariant);

                //only loose _tempRawVcfVariant if it was null. We are done with the file.
                if (!keepAddingNbhdsToTheBatch)
                {
                    break;
                }

                var allelesUnpackedFromVcfVariant = VcfVariantUtilities.Convert(new List <VcfVariant> {
                    _tempRawVcfVariant
                });


                foreach (var currentAllele in allelesUnpackedFromVcfVariant)
                {
                    if (currentAllele.Filters.Contains(FilterType.ForcedReport))
                    {
                        continue;
                    }

                    var currentVariantSite = new VariantSite(currentAllele);
                    var refBase            = currentVariantSite.VcfReferenceAllele.Substring(0, 1);

                    //append the next base, unless we have a repeated variant.
                    if (currentVariantSite.VcfReferencePosition != _lastVariantSite.VcfReferencePosition)
                    {
                        _referenceStringBetweenVariants += refBase;
                    }

                    //check its not reference or otherwise useless
                    if (!IsEligibleVariant(currentAllele))
                    {
                        continue;
                    }

                    //the current variant is close to the last one
                    if (IsProximal(currentVariantSite, _lastVariantSite, _phasableVariantCriteria.PhasingDistance))
                    {
                        keepAddingNbhdsToTheBatch = FitVariantsInNeighborhood(_lastVariantSite, currentVariantSite,
                                                                              _referenceStringBetweenVariants, numNbhdsSoFar);

                        _referenceStringBetweenVariants = "";
                    }
                    else
                    {
                        _referenceStringBetweenVariants = "";
                    }

                    _lastVariantSite = currentVariantSite;
                }
            }

            PrepNbhdsForUse(_nextBatchOfNeighborhoods);

            return(_nextBatchOfNeighborhoods);
        }
Beispiel #18
0
        public void UnpackAlleles()
        {
            //two example vcf files that have been "crushed".
            var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf");
            var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf");

            var vcfVariants1 = VcfReader.GetAllVariantsInFile(crushedVcf1);
            var vcfVariants2 = VcfReader.GetAllVariantsInFile(crushedVcf2);

            Assert.Equal(7, vcfVariants1.Count);
            Assert.Equal(90, vcfVariants2.Count);

            // 1/2 variants
            var hetAlt1     = vcfVariants1[5];
            var hetAlt2     = vcfVariants2[3];
            var hetAlt1next = vcfVariants1[6];
            var hetAlt2next = vcfVariants2[4];

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal(2, hetAlt1.VariantAlleles.Count());
            Assert.Equal(2, hetAlt2.VariantAlleles.Count());
            Assert.Equal("2387,2000", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("0.8133", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal("254,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1.VariantAlleles[1]);
            Assert.Equal(".", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("532", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10 + 1, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731 + 1, hetAlt2next.ReferencePosition);

            var unpackedVariants1 = VcfVariantUtilities.UnpackVariants(vcfVariants1);
            var unpackedVariants2 = VcfVariantUtilities.UnpackVariants(vcfVariants2);

            Assert.Equal(8, unpackedVariants1.Count);
            Assert.Equal(91, unpackedVariants2.Count);

            hetAlt1     = unpackedVariants1[5];
            hetAlt2     = unpackedVariants2[3];
            hetAlt1next = unpackedVariants1[6];
            hetAlt2next = unpackedVariants2[4];

            //example one:
            //total depth = 5394, total variant count = 2387 + 2000 = 4387
            //so, ref counts ~1007.

            //example two:
            //total depth = 532, total variant count = 254 + 254 = 508
            //so, ref counts ~24.

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal("1007,2387", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("0.4425", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal(1, hetAlt1.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2.VariantAlleles.Count());
            Assert.Equal(1, hetAlt1next.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2next.VariantAlleles.Count());
            Assert.Equal("1007,2000", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0.3708", hetAlt1next.Genotypes[0]["VF"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731, hetAlt2next.ReferencePosition);
        }
Beispiel #19
0
        public void Convert()
        {
            var vcfVar = TestHelper.CreateDummyVariant("chr10", 123, "A", "C", 1000, 156);

            vcfVar.Genotypes[0]["GT"] = "0/1";
            var allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.AlternateAllele);
            Assert.Equal(vcfVar.ReferenceAllele, allele.ReferenceAllele);
            Assert.Equal(vcfVar.ReferencePosition, allele.ReferencePosition);
            Assert.Equal(new List <FilterType>()
            {
            }, allele.Filters);
            Assert.Equal(Genotype.HeterozygousAltRef, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);
            Assert.Equal(0.0100f, allele.FractionNoCalls);

            // no US filed
            Assert.Null(Record.Exception(() => VcfVariantUtilities.ConvertUnpackedVariant(vcfVar)));

            // exception case if US field doesn't match configuration
            Assert.Throws <ArgumentOutOfRangeException>(() => VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, true));

            // exception case if US field doesn't match configuration
            Assert.Throws <ArgumentOutOfRangeException>(() => VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, false));

            // exception case
            vcfVar.Genotypes[0]["US"] = "1,2,3,4,5,6,7,8";
            Assert.Throws <ArgumentOutOfRangeException>(() => VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, true));


            vcfVar.Genotypes[0]["US"] = "0,0,1,0,0,0,8,1,23,5,20,10";
            Assert.False(VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false).ReadCollapsedCountTotal.Any(x => x != 0));

            // exception case
            vcfVar.Genotypes[0]["US"] = "0,0,1,0,0,0,8,1,23,5,20,10";
            Assert.Throws <ArgumentOutOfRangeException>(() => VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, false));

            // test for collapsed reads against specs
            // When ReportRcCounts and ReportTsCounts are set as True,
            // a US field will be outputted for each genotype as mutant support: 
            // duplex -stitched, duplex-nonstitched, simplex-forward-stitched, simplex-forward-nonstitched, simplex-reverse-stitched, simplex-reverse-nonstitched followed by total support: duplex-stitched, duplex-nonstitched, simplex-forward-stitched, simplex-forward-nonstitched, simplex-reverse-stitched, simplex-reverse-nonstitched.
            vcfVar.Genotypes[0]["US"] = "0,0,1,0,0,0,8,1,23,5,20,10";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, true);
            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.AlternateAllele);
            Assert.Equal(vcfVar.ReferenceAllele, allele.ReferenceAllele);
            Assert.Equal(vcfVar.ReferencePosition, allele.ReferencePosition);
            Assert.Equal(new List <FilterType>(), allele.Filters);
            Assert.Equal(Genotype.HeterozygousAltRef, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);
            Assert.Equal(0.0100f, allele.FractionNoCalls);

            Assert.Equal(8, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.DuplexStitched]);
            Assert.Equal(1, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.DuplexNonStitched]);
            Assert.Equal(23, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexForwardStitched]);
            Assert.Equal(5, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexForwardNonStitched]);
            Assert.Equal(20, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexReverseStitched]);
            Assert.Equal(10, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexReverseNonStitched]);
            Assert.Equal(0, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.DuplexStitched]);
            Assert.Equal(0, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.DuplexNonStitched]);
            Assert.Equal(1, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexForwardStitched]);
            Assert.Equal(0, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexForwardNonStitched]);
            Assert.Equal(0, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexReverseStitched]);
            Assert.Equal(0, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexReverseNonStitched]);

            // When contain XV and XW tags, output read counts for duplex-stitched, duplex-nonstitched, simplex-stitched, and simplex-nonstitched.
            vcfVar.Genotypes[0]["US"] = "1,2,3,4,5,6,7,8";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, true, false);
            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.AlternateAllele);
            Assert.Equal(vcfVar.ReferenceAllele, allele.ReferenceAllele);
            Assert.Equal(vcfVar.ReferencePosition, allele.ReferencePosition);
            Assert.Equal(new List <FilterType>(), allele.Filters);
            Assert.Equal(Genotype.HeterozygousAltRef, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);
            Assert.Equal(0.0100f, allele.FractionNoCalls);
            Assert.Equal(1, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.DuplexStitched]);
            Assert.Equal(2, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.DuplexNonStitched]);
            Assert.Equal(3, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexStitched]);
            Assert.Equal(4, allele.ReadCollapsedCountsMut[(int)ReadCollapsedType.SimplexNonStitched]);
            Assert.Equal(5, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.DuplexStitched]);
            Assert.Equal(6, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.DuplexNonStitched]);
            Assert.Equal(7, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexStitched]);
            Assert.Equal(8, allele.ReadCollapsedCountTotal[(int)ReadCollapsedType.SimplexNonStitched]);


            vcfVar.Genotypes[0]["GT"] = "./.";
            vcfVar.Filters            = "R5x9";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.AlternateAllele);
            Assert.Equal(vcfVar.ReferenceAllele, allele.ReferenceAllele);
            Assert.Equal(vcfVar.ReferencePosition, allele.ReferencePosition);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.RMxN
            }, allele.Filters);
            Assert.Equal(Genotype.AltLikeNoCall, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/2";
            vcfVar.Filters            = "R5x9;SB";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(vcfVar.VariantAlleles[0], allele.AlternateAllele);
            Assert.Equal(vcfVar.ReferenceAllele, allele.ReferenceAllele);
            Assert.Equal(vcfVar.ReferencePosition, allele.ReferencePosition);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.RMxN, FilterType.StrandBias
            }, allele.Filters);
            Assert.Equal(Genotype.HeterozygousAlt1Alt2, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/1";
            vcfVar.Filters            = "R8;q30";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.IndelRepeatLength, FilterType.LowVariantQscore
            }, allele.Filters);
            Assert.Equal(Genotype.HomozygousAlt, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            vcfVar.Genotypes[0]["GT"] = "1/1";
            vcfVar.Filters            = "lowvariantfreq;multiallelicsite";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar);

            Assert.Equal(vcfVar.ReferenceName, allele.Chromosome);
            Assert.Equal(new List <FilterType>()
            {
                FilterType.LowVariantFrequency, FilterType.MultiAllelicSite
            }, allele.Filters);
            Assert.Equal(Genotype.HomozygousAlt, allele.Genotype);
            Assert.Equal(AlleleCategory.Snv, allele.Type);

            // check handling of complex allele types (DO trimming, please)
            var originalPos = 10;

            vcfVar.VariantAlleles[0] = "CCGCA";
            vcfVar.ReferenceAllele   = "CC";
            vcfVar.ReferencePosition = originalPos;
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false);
            Assert.Equal(originalPos + 1, allele.ReferencePosition);
            Assert.Equal("CGCA", allele.AlternateAllele);
            Assert.Equal("C", allele.ReferenceAllele);

            // check handling of complex allele types (DO trimming, please)
            vcfVar.VariantAlleles[0] = "CGCAAA";
            vcfVar.ReferenceAllele   = "CA";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false);
            Assert.Equal(originalPos, allele.ReferencePosition);
            Assert.Equal("CGCAA", allele.AlternateAllele);
            Assert.Equal("C", allele.ReferenceAllele);

            // check handling of complex allele types (DO trimming, please)
            vcfVar.VariantAlleles[0] = "CCCGCAAA";
            vcfVar.ReferenceAllele   = "CCCA";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false);
            Assert.Equal(originalPos + 2, allele.ReferencePosition);
            Assert.Equal("CGCAA", allele.AlternateAllele);
            Assert.Equal("C", allele.ReferenceAllele);


            // check handling of complex allele types (DO NOT trim)
            vcfVar.VariantAlleles[0] = "CCGCA";
            vcfVar.ReferenceAllele   = "CC";
            vcfVar.ReferencePosition = originalPos;
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false, false);
            Assert.Equal(originalPos, allele.ReferencePosition);
            Assert.Equal("CCGCA", allele.AlternateAllele);
            Assert.Equal("CC", allele.ReferenceAllele);

            // check handling of complex allele types (DO NOT trim)
            vcfVar.VariantAlleles[0] = "CGCAAA";
            vcfVar.ReferenceAllele   = "CA";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false, false);
            Assert.Equal(originalPos, allele.ReferencePosition);
            Assert.Equal("CGCAAA", allele.AlternateAllele);
            Assert.Equal("CA", allele.ReferenceAllele);

            // check handling of complex allele types (DO NOT trim)
            vcfVar.VariantAlleles[0] = "CCCGCAAA";
            vcfVar.ReferenceAllele   = "CAA";
            allele = VcfVariantUtilities.ConvertUnpackedVariant(vcfVar, false, false, false);
            Assert.Equal(originalPos, allele.ReferencePosition);
            Assert.Equal("CCCGCAAA", allele.AlternateAllele);
            Assert.Equal("CAA", allele.ReferenceAllele);
        }
        public void MapFilterStringTests()
        {
            //happy path tests.

            Assert.Equal(0, VcfVariantUtilities.MapFilterString("PASS").Count);
            Assert.Equal(0, VcfVariantUtilities.MapFilterString("pass").Count);
            Assert.Equal(0, VcfVariantUtilities.MapFilterString(".").Count);
            Assert.Equal(0, VcfVariantUtilities.MapFilterString("").Count);
            Assert.Equal(0, VcfVariantUtilities.MapFilterString(" ").Count);

            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("lowq")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("q20")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("q30")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("LowQ")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("LowQ500")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("LowQual")[0]);

            Assert.Equal(FilterType.PoolBias, VcfVariantUtilities.MapFilterString("pb")[0]);

            Assert.Equal(FilterType.StrandBias, VcfVariantUtilities.MapFilterString("sb")[0]);

            Assert.Equal(FilterType.AmpliconBias, VcfVariantUtilities.MapFilterString("ab")[0]);

            Assert.Equal(FilterType.LowDepth, VcfVariantUtilities.MapFilterString("LOWDP")[0]);
            Assert.Equal(FilterType.LowDepth, VcfVariantUtilities.MapFilterString("lowdp")[0]);
            Assert.Equal(FilterType.LowDepth, VcfVariantUtilities.MapFilterString("lowdepth")[0]);

            Assert.Equal(FilterType.LowVariantFrequency, VcfVariantUtilities.MapFilterString("lowfreq")[0]);
            Assert.Equal(FilterType.LowVariantFrequency, VcfVariantUtilities.MapFilterString("lowvariantfreq")[0]);

            Assert.Equal(FilterType.LowGenotypeQuality, VcfVariantUtilities.MapFilterString("lowgq")[0]);
            Assert.Equal(FilterType.LowGenotypeQuality, VcfVariantUtilities.MapFilterString("gq")[0]);

            Assert.Equal(FilterType.IndelRepeatLength, VcfVariantUtilities.MapFilterString("r8")[0]);
            Assert.Equal(FilterType.IndelRepeatLength, VcfVariantUtilities.MapFilterString("R42")[0]);

            Assert.Equal(FilterType.RMxN, VcfVariantUtilities.MapFilterString("R5x9")[0]);
            Assert.Equal(FilterType.RMxN, VcfVariantUtilities.MapFilterString("R3x2")[0]);

            Assert.Equal(FilterType.MultiAllelicSite, VcfVariantUtilities.MapFilterString("multiallelicsite")[0]);

            Assert.Equal(FilterType.ForcedReport, VcfVariantUtilities.MapFilterString("forcedreport")[0]);

            Assert.Equal(FilterType.NoCall, VcfVariantUtilities.MapFilterString("nc")[0]);


            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("MyCatIsCool")[0]);


            //pathological tests

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("PAS")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("passFoo")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("!")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("42")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("q")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("bq20")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("sq30")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("pb3")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("4sb")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("ab2")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("LOWDP500")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("7r8")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("r")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("R5Y9")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("R3Z2")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("multiallelicsite43")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("4forcedreport")[0]);

            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("ncc")[0]);

            //combinations

            var filterString1 = "lowdepth;lowvariantfreq;gq;r5x9  \t ; blah ; multiallelicsite;foo ";

            var filterString1List = VcfVariantUtilities.MapFilterString(filterString1);

            Assert.Equal(FilterType.LowDepth, filterString1List[0]);
            Assert.Equal(FilterType.LowVariantFrequency, filterString1List[1]);
            Assert.Equal(FilterType.LowGenotypeQuality, filterString1List[2]);
            Assert.Equal(FilterType.RMxN, filterString1List[3]);
            Assert.Equal(FilterType.Unknown, filterString1List[4]);
            Assert.Equal(FilterType.MultiAllelicSite, filterString1List[5]);
            Assert.Equal(FilterType.Unknown, filterString1List[6]);

            //really strange stuff...

            Assert.Equal(0, VcfVariantUtilities.MapFilterString("; ; ;").Count);
            Assert.Equal(0, VcfVariantUtilities.MapFilterString("; ; PASS;").Count);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString(", ,... , ")[0]);//note, only splits on ";", not "'"
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString(", , , ")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("lowdepth, multiallelicsite, lowvariantfreq , gq, r5x9")[0]);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("(*%.,PASS,q30")[0]);
            Assert.Equal(1, VcfVariantUtilities.MapFilterString("(*%.,PASS,q30").Count);
            Assert.Equal(2, VcfVariantUtilities.MapFilterString("(*%.,;PASS;q30").Count);
            Assert.Equal(FilterType.Unknown, VcfVariantUtilities.MapFilterString("(*%.,;PASS;q30")[0]);
            Assert.Equal(FilterType.LowVariantQscore, VcfVariantUtilities.MapFilterString("(*%.,;PASS;q30")[1]);
        }
Beispiel #21
0
        public void TestTrimUnsupportedAlleleType()
        {
            //we should trim from the back, so the position is conserved if possible.
            var allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATA", "TAATAAATAAATA", 1000, 156);

            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TAATAAATA", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);


            allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATA", "TAATA", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATAAATAAATA", "TAATA", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("TAATAAATA", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);


            allele = TestHelper.CreateDummyAllele("chr10", 123, "TACTAAATAAATA", "TAATA", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            // 123, "TACTAAATAAATA", "TAATA",
            //-> 123, "TACTAAATA", "T",  (trim the back)
            //-> 123, "TACTAAATA", "T"       (trim the front - nothing)
            Assert.Equal("TACTAAATA", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATA", "TACTAAATAAATA", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TACTAAATA", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);


            allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATA", "TAATAAATAAATC", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            // 123, "TAATA",  "TAATAAATAAATC",
            //-> 123,  "TAATA",  "TAATAAATAAATC",  (trim the back -nothing)
            //-> 123+4,  "(TAAT)A",  "(TAAT)AAATAAATC"      (trim the front - 4)
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("AAATAAATC", allele.AlternateAllele);
            Assert.Equal(123 + 4, allele.ReferencePosition);

            //negative case, insertion, should leave it alone
            allele = TestHelper.CreateDummyAllele("chr10", 123, "T", "TAATAAATAAATC", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("T", allele.ReferenceAllele);
            Assert.Equal("TAATAAATAAATC", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            //negative case, deletion, should leave it alone
            allele = TestHelper.CreateDummyAllele("chr10", 123, "TAATAAATAAATC", "T", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("TAATAAATAAATC", allele.ReferenceAllele);
            Assert.Equal("T", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            //case that somehow got through all the other tests.. (found as a bug in the 5.2.6 RC testing)
            allele = TestHelper.CreateDummyAllele("chr10", 123, "CTGCCATACAGCTTCAACAACAACTT", "ATGCCATACAGCTTCAACAACAA", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("CTGCCATACAGCTTCAACAACAACTT", allele.ReferenceAllele);
            Assert.Equal("ATGCCATACAGCTTCAACAACAA", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            allele = TestHelper.CreateDummyAllele("chr10", 123, "ATGCCATACAGCTTCAACAACAA", "CTGCCATACAGCTTCAACAACAACTT", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("ATGCCATACAGCTTCAACAACAA", allele.ReferenceAllele);
            Assert.Equal("CTGCCATACAGCTTCAACAACAACTT", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            allele = TestHelper.CreateDummyAllele("chr10", 123, "A", "C", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("C", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);

            allele = TestHelper.CreateDummyAllele("chr10", 123, "A", "A", 1000, 156);
            VcfVariantUtilities.TrimUnsupportedAlleleType(allele);
            Assert.Equal("A", allele.ReferenceAllele);
            Assert.Equal("A", allele.AlternateAllele);
            Assert.Equal(123, allele.ReferencePosition);
        }