public static BamAlignment StitchifyBamAlignment(ReadPair pair, Read read, char read1dir, char read2dir) { var alignment = new BamAlignment(read.BamAlignment); alignment.SetIsFirstMate(false); alignment.SetIsProperPair(false); var tagUtils = new TagUtils(); if (read.StitchedCigar != null) { alignment.CigarData = read.StitchedCigar; } if (read.CigarDirections != null) { tagUtils.AddStringTag("XD", read.CigarDirections.ToString()); } // if the original reads had UMIs and were collapsed, they will have XU(Z), XV(i), XW(i) // these need to be copied to correctly populate some fields in the called variants if (pair.Read1.TagData != null && pair.Read1.TagData.Length > 0) { var xu = pair.Read1.GetStringTag("XU"); if (xu != null) { tagUtils.AddStringTag("XU", xu); } var xv = pair.Read1.GetIntTag("XV"); if (xv.HasValue) { tagUtils.AddIntTag("XV", xv.Value); } var xw = pair.Read1.GetIntTag("XW"); if (xw.HasValue) { tagUtils.AddIntTag("XW", xw.Value); } } var xr = string.Format("{0}{1}", read1dir, read2dir); tagUtils.AddStringTag("XR", xr); var tagData = tagUtils.ToBytes(); var existingTags = alignment.TagData; if (existingTags == null) { alignment.TagData = tagData; } else { alignment.AppendTagData(tagData); } return(alignment); }
public static byte[] GetXDXRTagData(string xd, string xr) { var tagUtils = new TagUtils(); if (!string.IsNullOrEmpty(xd)) { tagUtils.AddStringTag("XD", xd); } if (!string.IsNullOrEmpty(xr)) { tagUtils.AddStringTag("XR", xr); } return(tagUtils.ToBytes()); }
private static void CheckSequencedBaseDirectionMap(string inputXDtag, string inputCigarString, string sequence, DirectionType[] expectedDirectionMap) { var tagUtils = new TagUtils(); tagUtils.AddStringTag("XD", inputXDtag); var alignment = new BamAlignment { Bases = sequence, Position = 100, MatePosition = 500, Name = "test", CigarData = new CigarAlignment(inputCigarString), MapQuality = 10, TagData = tagUtils.ToBytes(), Qualities = new[] { (byte)10, (byte)20, (byte)30 } }; var read = new Read("chr7", alignment); var directionMap = read.SequencedBaseDirectionMap; Assert.Equal(expectedDirectionMap, directionMap); var directTestMap = Read.CreateSequencedBaseDirectionMap(read.CigarDirections, read.CigarData); Assert.Equal(expectedDirectionMap, directTestMap); }
public static byte[] GetXCTagData(string value) { var tagUtils = new TagUtils(); tagUtils.AddStringTag("XC", value); return(tagUtils.ToBytes()); }
private static void CheckReadIndexToExpandedIndex(string inputXDtag, string inputCigarString, string sequence, int[] readIndexes, int[] expectedExpandedIndexes) { var tagUtils = new TagUtils(); tagUtils.AddStringTag("XD", inputXDtag); var alignment = new BamAlignment { Bases = sequence, Position = 100, MatePosition = 500, Name = "test", CigarData = new CigarAlignment(inputCigarString), MapQuality = 10, TagData = tagUtils.ToBytes(), Qualities = new[] { (byte)10, (byte)20, (byte)30 } }; var read = new Read("chr7", alignment); var directionMap = read.SequencedBaseDirectionMap; var observedExpandedIndexes = read.SequencedIndexesToExpandedIndexes(readIndexes); for (int i = 0; i < readIndexes.Length; i++) { var observedExpandedIndex = observedExpandedIndexes[i]; var expectedExpandedIndex = expectedExpandedIndexes[i]; Assert.Equal(expectedExpandedIndex, observedExpandedIndex); } ; }
private BamAlignment CreateAlignment(string name = "", int position = 1, bool isMapped = true, bool isSecondary = false, string cigar = "4M", bool reverse = false, bool isSupplementary = false, string supplementary = null) { var alignment = new BamAlignment(); alignment.Name = name; alignment.Position = position; alignment.RefID = 1; if (isSupplementary) { alignment.AlignmentFlag = 2048; } alignment.SetIsUnmapped(!isMapped); alignment.SetIsSecondaryAlignment(isSecondary); if (supplementary != null) { var tagUtils = new TagUtils(); tagUtils.AddStringTag("SA", supplementary); alignment.TagData = tagUtils.ToBytes(); } if (reverse) { alignment.SetIsReverseStrand(true); } alignment.CigarData = new CigarAlignment(cigar); alignment.Bases = new string('A', (int)alignment.CigarData.GetReadSpan()); alignment.Qualities = new byte[alignment.Bases.Length]; alignment.SetIsProperPair(true); return(alignment); }
private Read CopyRead(Read read) { var copiedRead = new Read(read.Chromosome, CreateAlignment(read.BamAlignment.Name, read.BamAlignment.Position, read.BamAlignment.CigarData.ToString(), read.BamAlignment.Bases)); copiedRead.BamAlignment.SetIsDuplicate(read.IsPcrDuplicate); copiedRead.BamAlignment.SetIsSecondaryAlignment(!read.IsPrimaryAlignment); copiedRead.BamAlignment.SetIsSupplementaryAlignment(read.IsSupplementaryAlignment); if (read.HasSupplementaryAlignment) { var tagUtils = new TagUtils(); tagUtils.AddStringTag("SA", "dummy"); copiedRead.BamAlignment.AppendTagData(tagUtils.ToBytes()); } return(copiedRead); }
private static BamAlignment BuildRead(AbstractAlignment alignment, byte qualityForAll, Tuple <int, int> MNVdata) { int MNVPosition = MNVdata.Item1; int MNVLength = MNVdata.Item2; try { var ca = new CigarAlignment(alignment.Cigar); int readLength = (int)ca.GetReadSpan(); string readSequence = new string('A', readLength); //originalAlignment.Sequence; if (MNVLength > 0) { readSequence = new string('A', MNVPosition - 1); readSequence += new string('G', MNVLength); readSequence += new string('A', readLength - readSequence.Length); } var varTagUtils = new TagUtils(); varTagUtils.AddStringTag("XD", alignment.Directions); var varRead = new BamAlignment() { RefID = 1, Position = alignment.Position - 1, CigarData = ca, Bases = readSequence, TagData = varTagUtils.ToBytes(), Qualities = Enumerable.Repeat(qualityForAll, readLength).ToArray(), MapQuality = 50 }; return(varRead); } catch { return(null); } }
public void GetTag_Tests() { // create a tag TagUtils tagUtils = new TagUtils(); tagUtils.AddIntTag("NM", 5); tagUtils.AddStringTag("XU", "ABCD"); tagUtils.AddCharTag("XP", '?'); byte[] tagData = tagUtils.ToBytes(); var alignment = new BamAlignment() { TagData = tagData }; // string tag scenarios Assert.Equal("ABCD", alignment.GetStringTag("XU")); Assert.Equal("?", alignment.GetStringTag("XP")); Assert.Throws <ApplicationException>(() => alignment.GetStringTag("NM")); Assert.Equal(null, alignment.GetStringTag("AB")); }
private void ExtractReads(bool addUmiTags) { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("BC", "14"); tagUtils.AddIntTag("SM", 40); if (addUmiTags) { tagUtils.AddIntTag("XV", 1); tagUtils.AddIntTag("XW", 2); tagUtils.AddStringTag("XU", "ABBA-ZABBA"); } alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 0, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils2.AddIntTag("NM", 5); tagUtils2.AddStringTag("BC", "14"); tagUtils2.AddIntTag("SM", 20); if (addUmiTags) { tagUtils2.AddIntTag("XV", 1); tagUtils2.AddIntTag("XW", 2); tagUtils2.AddStringTag("XU", "ABBA-ZABBA"); } alignment2.AppendTagData(tagUtils2.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.NotNull(alignment.GetStringTag("XD")); Assert.Null(alignment.GetIntTag("NM")); Assert.Null(alignment.GetStringTag("BC")); Assert.Null(alignment.GetIntTag("SM")); if (addUmiTags) { Assert.Equal("ABBA-ZABBA", alignment.GetStringTag("XU")); Assert.Equal(1, alignment.GetIntTag("XV")); Assert.Equal(2, alignment.GetIntTag("XW")); } }
private void ExtractReadsStrandXRValidation() { var refIdMapping = new Dictionary <int, string>() { { 1, "chr1" } }; var stitcher = StitcherTestHelpers.GetStitcher(10, false); var readStatusCounter = new ReadStatusCounter(); var pairHandler = new PairHandler(refIdMapping, stitcher, readStatusCounter, filterUnstitchablePairs: true); var alignment1 = new BamAlignment() { AlignmentFlag = 99, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment1.AppendTagData(tagUtils.ToBytes()); var alignment2 = new BamAlignment() { AlignmentFlag = 144, Bases = "ABCF", Bin = 4, CigarData = new CigarAlignment("2S2M"), FragmentLength = 42, MapQuality = 30, MatePosition = 2, MateRefID = 43, Name = "Read1", Position = 1, Qualities = new byte[4], RefID = 1, TagData = new byte[0] }; var tagUtils2 = new TagUtils(); tagUtils.AddStringTag("XR", "BLABLA"); //start with random XR tag and confirm whether it is overwritten by read direction alignment2.AppendTagData(tagUtils.ToBytes()); var readPair = new ReadPair(alignment1); readPair.AddAlignment(alignment2); var alignmentResults = pairHandler.ExtractReads(readPair); Assert.Equal(1, alignmentResults.Count); var alignment = alignmentResults[0]; Assert.Equal("FR", alignment.GetStringTag("XR")); }
public void Execute() { var dupRead = new Read("chr", CreateAlignment("Duplicate", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); dupRead.BamAlignment.SetIsDuplicate(true); var nonPrimaryRead = new Read("chr", CreateAlignment("NonPrimary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); nonPrimaryRead.BamAlignment.SetIsSecondaryAlignment(true); var SupplementaryRead = new Read("chr", CreateAlignment("Supplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); SupplementaryRead.BamAlignment.SetIsSupplementaryAlignment(true); var HasSupplementaryRead = new Read("chr", CreateAlignment("HasSupplementary", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); var tagUtils = new TagUtils(); tagUtils.AddStringTag("SA", "dummy"); HasSupplementaryRead.BamAlignment.AppendTagData(tagUtils.ToBytes()); var passesSuspicion = new Read("chr", CreateAlignment("PassesSuspicion", 0, "4M", "ACGT")); var hasIndels = new Read("chr", CreateAlignment("HasIndels", 5, "5S5M5I5M", "ACGTACGTACTATATAATAC")); // HasIndels shifts too far. Do not write it. Don't realign dups. var extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); var writer = new MockRealignmentWriter(new List <string> { }, new List <string> { hasIndels.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, dupRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, true, 2); // Allow realignment of dups, and increase max shift to let hasIndels through extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); writer = new MockRealignmentWriter(new List <string> { dupRead.Name, hasIndels.Name }, new List <string> { nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, false, 50); //Don't allow dups, but make max shift big enough for hasIndels extractorForRealign = new MockExtractor(new List <Read> { CopyRead(dupRead), CopyRead(nonPrimaryRead), CopyRead(SupplementaryRead), CopyRead(HasSupplementaryRead), CopyRead(passesSuspicion), CopyRead(hasIndels) }); writer = new MockRealignmentWriter(new List <string> { hasIndels.Name, }, new List <string> { dupRead.Name, nonPrimaryRead.Name, SupplementaryRead.Name, HasSupplementaryRead.Name, passesSuspicion.Name }); SetupMocksandExecute(extractorForRealign, writer, true, 50); }
public List <BamAlignment> ExtractReads(ReadPair pair) { var reads = new List <BamAlignment>(); var chrom1 = _refIdMapping[pair.Read1.RefID]; var chrom2 = _refIdMapping[pair.Read2.RefID]; var alignmentSet = new AlignmentSet( new Read(chrom1, pair.Read1), new Read(chrom2, pair.Read2), false); var stitched = _stitcher.TryStitch(alignmentSet); if (stitched) { if (alignmentSet.ReadsForProcessing.Count > 1) { throw new Exception("AlignmentSets for stitched reads should only have one ReadsForProcessing."); } foreach (var read in alignmentSet.ReadsForProcessing) { var alignment = new BamAlignment(read.BamAlignment); alignment.SetIsFirstMate(false); alignment.SetIsProperPair(false); var tagUtils = new TagUtils(); if (read.StitchedCigar != null) { alignment.CigarData = read.StitchedCigar; } if (read.CigarDirections != null) { tagUtils.AddStringTag("XD", read.CigarDirections.ToString()); } var tagData = tagUtils.ToBytes(); var existingTags = alignment.TagData; if (existingTags == null) { alignment.TagData = tagData; } else { alignment.AppendTagData(tagData); } reads.Add(alignment); } } else { if (!_filterUnstitchablePairs) { _statusCounter.AddStatusCount("Unstitchable Pairs Kept"); reads.Add(new BamAlignment(alignmentSet.PartnerRead1.BamAlignment)); reads.Add(new BamAlignment(alignmentSet.PartnerRead2.BamAlignment)); } else { _statusCounter.AddStatusCount("Unstitchable Pairs Filtered"); } } return(reads); }
public void TryPair() { var dupIdentifier = new Mock <IDuplicateIdentifier>(); var filter = new StitcherPairFilter(false, false, dupIdentifier.Object, new ReadStatusCounter()); // First alignment the filter sees: not paired yet, hold on to it var firstInPair = CreateAlignment("abc"); var tagUtils = new TagUtils(); tagUtils.AddStringTag("SA", "chr1,100,+,3M,50,1"); firstInPair.TagData = tagUtils.ToBytes(); var pair = filter.TryPair(firstInPair); Assert.Null(pair); // Found the original alignment's pair -> return the pair, even though it has a supplementary which has not yet been encountered var secondInPair = CreateAlignment("abc"); pair = filter.TryPair(secondInPair); Assert.NotNull(pair); // There should be nothing unpaired Assert.Equal(0, filter.GetFlushableUnpairedReads().Count()); // Add another one and don't give it a pair (i.e. a singleton), there should be a single unpaired pair = filter.TryPair(CreateAlignment("singleton")); Assert.Null(pair); Assert.Equal(1, filter.GetFlushableUnpairedReads().Count()); // Improper pairs should be treated as incomplete (like a singleton) Assert.Equal(0, filter.GetFlushableUnpairedReads().Count()); var improperPairRead1 = CreateAlignment("improper", false); var improperPairRead2 = CreateAlignment("improper", false); pair = filter.TryPair(improperPairRead1); Assert.Null(pair); pair = filter.TryPair(improperPairRead2); Assert.Null(pair); // Should be able to get both of these reads back with unpaired Assert.Equal(2, filter.GetFlushableUnpairedReads().Count()); /// /// LowMapQ filter tests both the filterPairLowMapQ and the minmapquality setting /// If filterPairLowMapQ == true, and one or both reads below mapQ both read pairs should be filtered and have 0 FlushableUnpairedReads /// If filterPairLowMapQ == false, and one read below mapQ, only one read should be filtered and have 1 FlushableUnpairedReads /// // HappyCase, both reads mapQ = 30, filterPair on, Stitched so 0 flushable reads, minMapQ = 20 LowMapQualityTest(30, 30, true, 0, 20, true); // r1MapQ = 10, r2MapQ = 30, filterPair on, expected to throw out both reads, minMapQ = 20) LowMapQualityTest(10, 30, true, 0, 20, false); // r1MapQ = 30, r2MapQ = 10, filterPair on, expected to throw out both reads, minMapQ = 20) LowMapQualityTest(10, 30, true, 0, 20, false); // r1MapQ = 30, r2MapQ = 10, filterPair off, expected to throw out 1 read, minMapQ = 20) LowMapQualityTest(10, 30, false, 1, 20, false); // r1MapQ = 10, r2MapQ = 30, filterPair off, expected to throw out 1 read, minMapQ = 20) LowMapQualityTest(10, 30, false, 1, 20, false); // r1MapQ = 3, r2MapQ = 20, filterPair on, expected to throw out both reads, minMapQ = 5) LowMapQualityTest(3, 20, true, 0, 5, false); // r1MapQ = 10, r2MapQ = 10, filterPair on, expected to throw out both reads, minMapQ = 20) LowMapQualityTest(10, 10, true, 0, 20, false); // r1MapQ = 10, r2MapQ = 10, filterPair off, expected to throw out both reads, minMapQ = 20) LowMapQualityTest(10, 10, false, 0, 20, false); // r1MapQ = 19, r2MapQ = 20, filterPair on, expected to throw out both reads, minMapQ = 20) LowMapQualityTest(19, 20, true, 0, 20, false); // r1MapQ = 19, r2MapQ = 20, filterPair off, expected to throw out 1 reads, minMapQ = 20) LowMapQualityTest(19, 20, false, 1, 20, false); // Non-overlapping pairs should be treated as incomplete (like a singleton) Assert.Equal(0, filter.GetFlushableUnpairedReads().Count()); var nonOverlappingRead1 = CreateAlignment("noOverlap", true, 1, "3M2I"); var nonOverlappingRead2 = CreateAlignment("noOverlap", true, 4, "2I4M"); pair = filter.TryPair(nonOverlappingRead1); Assert.Null(pair); pair = filter.TryPair(nonOverlappingRead2); Assert.Null(pair); // Should be able to get both of these reads back with unpaired Assert.Equal(2, filter.GetFlushableUnpairedReads().Count()); }