public static void UpdatePositionMap(int position, CigarAlignment cigarData, int[] positionMapArray, bool differentiateSoftClip = false, string readName = null) { var positionMap = new PositionMap(positionMapArray); if (cigarData != null) { ValidateCigar(cigarData, positionMap.Length, readName); } int readIndex = 0; int referencePosition = position; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; var readSpan = operation.IsReadSpan(); var refSpan = operation.IsReferenceSpan(); for (var opIndex = 0; opIndex < operation.Length; opIndex++) { if (readSpan) { positionMap.UpdatePositionAtIndex(readIndex, refSpan ? referencePosition++ : differentiateSoftClip && operation.Type == 'S' ? -2 : -1, true); readIndex++; } else if (refSpan) { referencePosition++; } } } }
public StitchingInfo() { StitchedCigar = new CigarAlignment(); StitchedDirections = new CigarDirection(); StitchedBases = new List <char>(); StitchedQualities = new List <byte>(); }
public static void UpdatePositionMap(int position, CigarAlignment cigarData, int[] positionMap, bool differentiateSoftClip = false) { if (cigarData != null) { ValidateCigar(cigarData, positionMap.Length); } int readIndex = 0; int referencePosition = position; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; var readSpan = operation.IsReadSpan(); var refSpan = operation.IsReferenceSpan(); for (var opIndex = 0; opIndex < operation.Length; opIndex++) { if (readSpan) { positionMap[readIndex] = refSpan ? referencePosition++ : differentiateSoftClip && operation.Type == 'S' ? -2 : -1; readIndex++; } else if (refSpan) { referencePosition++; } } } }
private void UpdateFromBam(bool stitchingEnabled = false) { if (DirectionMap == null || DirectionMap.Length != ReadLength) { DirectionMap = new DirectionType[ReadLength]; } var reverse = BamAlignment.IsReverseStrand(); for (var i = 0; i < DirectionMap.Length; i++) { DirectionMap[i] = reverse ? DirectionType.Reverse : DirectionType.Forward; } if (PositionMap == null || PositionMap.Length != ReadLength) { PositionMap = new int[ReadLength]; } for (var i = 0; i < PositionMap.Length; i++) { PositionMap[i] = -1; } UpdateMapFromCigar(); if (stitchingEnabled && BamAlignment.TagData != null && BamAlignment.TagData.Length > 0) { var xcTag = BamAlignment.GetStringTag(BamAlignment.TagData, "XC"); if (xcTag != null) { StitchedCigar = new CigarAlignment(xcTag); } } }
private List <string> ExpandCigar(CigarAlignment cigar, CigarDirection directions) { var expandedDirections = new List <DirectionType>(); foreach (var direction in directions.Directions) { for (var i = 0; i < direction.Length; i++) { expandedDirections.Add(direction.Direction); } } var expandedCigar = new List <string>(); var index = 0; foreach (CigarOp op in cigar) { for (var i = 0; i < op.Length; i++) { expandedCigar.Add(op.Type.ToString()[0].ToString()); } } var expandedCigarDirection = new List <string>(); for (int i = 0; i < expandedCigar.Count; i++) { var cigarChunk = expandedCigar[i].ToString() + expandedDirections[i].ToString()[0]; expandedCigarDirection.Add(cigarChunk); } return(expandedCigarDirection); }
public void HasInternalSoftclip() { var cigar = new CigarAlignment("2S3M2S"); Assert.False(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("2S3M1S1M1S"); Assert.True(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("3M1S1M"); Assert.True(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("3M1D2S1M"); Assert.True(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("1S3M1D1S1M"); Assert.True(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("3M1I1M"); Assert.False(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("3M1D2I1M"); Assert.False(cigar.HasInternalSoftclip()); cigar = new CigarAlignment("1S3M1D1I1M"); Assert.False(cigar.HasInternalSoftclip()); }
private static void ValidateCigar(CigarAlignment stitchedCigar, CigarAlignment startCigar) { var mismatch = false; for (int i = 0; i < startCigar.Count - 1; i++) { var stitchedOp = stitchedCigar[i]; var operation = startCigar[i]; if (stitchedOp.Type != operation.Type || stitchedOp.Length != operation.Length) { mismatch = true; break; } } // check last one separately, mismatch if: // - same type but longer length than stitched // - different type and not soft clipped // - different type and soft clipped but stitched type is not I var lastStitchedOp = stitchedCigar[startCigar.Count - 1]; var lastOperation = startCigar[startCigar.Count - 1]; mismatch = mismatch || (lastStitchedOp.Type == lastOperation.Type && lastStitchedOp.Length < lastOperation.Length) || (lastStitchedOp.Type != lastOperation.Type && lastOperation.Type != 'S') || (lastStitchedOp.Type != lastOperation.Type && lastOperation.Type == 'S' && lastStitchedOp.Type != 'I'); if (mismatch) { throw new ApplicationException(String.Format("Unable to stitch: mismatch between stitched '{0}' and read '{1}' cigar", stitchedCigar, startCigar)); } }
public void CigarAlignment_Constructor() { //Parameterless Constructor var emptyCigarAlignment = new CigarAlignment(); Assert.Equal(0, emptyCigarAlignment.Count); //Happy Path var cigarAlignment = new CigarAlignment("5M6I"); Assert.Equal(2, cigarAlignment.Count); Assert.Equal(new CigarOp('M', 5), cigarAlignment[0]); Assert.Equal(new CigarOp('I', 6), cigarAlignment[1]); //Empty Cigar String var emptyCigarString = new CigarAlignment(""); Assert.Equal(0, emptyCigarString.Count); //Empty Cigar String var specialCharCigarString = new CigarAlignment("*"); Assert.Equal(0, specialCharCigarString.Count); //Malformatted Cigar String Assert.Throws <Exception>(() => new CigarAlignment("6Y")); Assert.Throws <Exception>(() => new CigarAlignment("10")); }
// get overlap boundary relative to reads // first key is read1 index position that is the start of the overlap // second key is read2 index position that is the end of the overlap public static OverlapBoundary GetOverlapBoundary(Read read1, Read read2, string stitchedCigar) { var totalStitchedLength = new CigarAlignment(stitchedCigar).GetReadSpan(); var overlapLength = read1.Sequence.Length + read2.Sequence.Length - (int)totalStitchedLength; if (overlapLength <= 0) { throw new ReadsNotStitchableException(string.Format("No overlap between reads {0} and {1}", read1.ToString(), read2.ToString())); } //In this case, we'll just assume that the stitching is simple and the overlap reaches exactly as far back into R1 as it does forward into R2. var overlapBoundary = new OverlapBoundary() { OverlapLength = overlapLength, Read1 = new ReadIndexBoundary() { StartIndex = read1.Sequence.Length - overlapLength, EndIndex = read1.Sequence.Length - 1 }, Read2 = new ReadIndexBoundary() { StartIndex = 0, EndIndex = overlapLength - 1 } }; return(overlapBoundary); }
public static CigarAlignment GetReverse(this CigarAlignment cigar) { var reverseCigar = new CigarAlignment(cigar); reverseCigar.Reverse(); return(reverseCigar); }
public Read GenerateNifiedMergedRead(AlignmentSet set, bool useSoftclippedBases) { var read1InsertionAdjustedEnd = set.PartnerRead1.ClipAdjustedEndPosition + set.PartnerRead1.CigarData.GetSuffixInsertionLength(); var read2InsertionAdjustedEnd = set.PartnerRead2.ClipAdjustedEndPosition + set.PartnerRead2.CigarData.GetSuffixInsertionLength(); var read1LongerThanRead2 = read2InsertionAdjustedEnd < read1InsertionAdjustedEnd; var furthestRight = read1LongerThanRead2 ? read1InsertionAdjustedEnd : read2InsertionAdjustedEnd; var nifiedStitchedLength = furthestRight + 1 - set.PartnerRead1.ClipAdjustedPosition; var prefixClip = set.PartnerRead1.CigarData.GetPrefixClip(); var suffixClip = read1LongerThanRead2 ? set.PartnerRead1.CigarData.GetSuffixClip() : set.PartnerRead2.CigarData.GetSuffixClip(); if (read2InsertionAdjustedEnd == read1InsertionAdjustedEnd) { suffixClip = Math.Min(set.PartnerRead1.CigarData.GetSuffixClip(), set.PartnerRead2.CigarData.GetSuffixClip()); } var nifiedStitchedCigar = new CigarAlignment((prefixClip > 0 ? string.Format("{0}S", prefixClip) : "") + string.Format("{0}M", nifiedStitchedLength - prefixClip - suffixClip) + (suffixClip > 0 ? string.Format("{0}S", suffixClip) : "")); var beforeOverlap = (useSoftclippedBases ? set.PartnerRead2.ClipAdjustedPosition : set.PartnerRead2.Position) - set.PartnerRead1.ClipAdjustedPosition; var afterOverlap = read1LongerThanRead2 ? (read1InsertionAdjustedEnd - read2InsertionAdjustedEnd) : (read2InsertionAdjustedEnd - read1InsertionAdjustedEnd); var r1Forward = set.PartnerRead1.SequencedBaseDirectionMap.First() == DirectionType.Forward; var beforeOverlapDirection = r1Forward ? "F" : "R"; var afterOverlapDirection = read1LongerThanRead2 ? (r1Forward ? "F" : "R") : (r1Forward ? "R" : "F"); var nifiedStitchedDirections = (beforeOverlap > 0 ? string.Format("{0}{1}", beforeOverlap, beforeOverlapDirection) : "") + string.Format("{0}S", nifiedStitchedLength - beforeOverlap - afterOverlap) + (afterOverlap > 0 ? string.Format("{0}{1}", afterOverlap, afterOverlapDirection) : ""); var mergedRead = new Read(set.PartnerRead1.Chromosome, new BamAlignment { Name = set.PartnerRead1.Name, Bases = new string('N', nifiedStitchedLength), Position = Math.Min(set.PartnerRead1.Position - 1, set.PartnerRead2.Position - 1), Qualities = Enumerable.Repeat((byte)0, nifiedStitchedLength).ToArray(), CigarData = nifiedStitchedCigar }) { StitchedCigar = nifiedStitchedCigar, CigarDirections = new CigarDirection(nifiedStitchedDirections) }; return(mergedRead); }
public static Read CreateRead(string chr, string sequence, int position, CigarAlignment cigar = null, byte[] qualities = null, int matePosition = 0, byte qualityForAll = 30, bool isReverseMapped = false, uint mapQ = 30) { var bamAlignment = CreateBamAlignment(sequence, position, matePosition, qualityForAll, isReverseMapped, mapQ, qualities, cigar); var read = new Read(chr, bamAlignment); return(read); }
public StitchingInfo GetStitchedCigar(CigarAlignment cigar1, int pos1, CigarAlignment cigar2, int pos2, bool reverseFirst, bool pairIsOutie) { var positions = GetStitchedSites(cigar1, cigar2, pos2, pos1); var success = true; var stitchingInfo = ReconcileSites(positions, reverseFirst, out success, pairIsOutie ? (int)cigar2.GetPrefixClip() : (int)cigar1.GetPrefixClip(), pairIsOutie ? (int)(cigar1.GetReadSpan() - (int)cigar1.GetSuffixClip()) : (int)(cigar2.GetReadSpan() - (int)cigar2.GetSuffixClip()), pairIsOutie); return(success ? stitchingInfo : null); }
// make sure individual read cigars make sense against stitched cigar protected static void ValidateCigar(CigarAlignment stitchedCigar, CigarAlignment read1Cigar, CigarAlignment read2Cigar) { if (!stitchedCigar.IsSupported()) { throw new Exception(String.Format("Unsupported cigar: {0}", stitchedCigar)); } ValidateCigar(stitchedCigar, read1Cigar); ValidateCigar(stitchedCigar.GetReverse(), read2Cigar.GetReverse()); }
public static bool HasOperationAtOpIndex(this CigarAlignment cigar, int index, char type, bool fromEnd = false) { if (cigar == null) { return(false); } var opIndex = fromEnd ? cigar.Count - index - 1 : index; return(cigar.Count > opIndex && opIndex >= 0 && cigar[opIndex].Type == type); }
public void MaskPartialInsertion(HashableIndel[] indels, Read read, string refSequence, RealignmentResult result, int refSequenceStartIndex = 0) { // Softclip partial insertions at read ends // Assumption: there should be no softclips in the cigar by this time // Assumption: there should be exactly as many/the same indels in "indels" as are represented in the cigar in "result.Cigar". var firstIndel = indels[0]; var lastIndel = indels[indels.Length - 1]; bool hasInsertion = (firstIndel.Type == AlleleCategory.Insertion || lastIndel.Type == AlleleCategory.Insertion); if (hasInsertion) { if (_minimumUnanchoredInsertionLength > 0 || _maskPartialInsertion) { var newCigar = new CigarAlignment { }; for (int i = 0; i < result.Cigar.Count; i++) { if (result.Cigar[i].Type == 'S') { throw new InvalidDataException( string.Format( "Found an unexpected cigar type [{0}] in CIGAR string {1} before re-softclipping", result.Cigar[i].Type, result.Cigar)); } else if (i == 0 && EvaluateInsertionAtReadEnds(result.Cigar[i], firstIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else if (i == result.Cigar.Count - 1 && EvaluateInsertionAtReadEnds(result.Cigar[i], lastIndel, _minimumUnanchoredInsertionLength, _maskPartialInsertion)) { newCigar.Add(new CigarOp('S', result.Cigar[i].Length)); } else { newCigar.Add(result.Cigar[i]); } } newCigar.Compress(); result.Cigar = newCigar; } } var newSummary = Extensions.GetAlignmentSummary(result.Position - 1 - refSequenceStartIndex, result.Cigar, refSequence, read.Sequence, _trackActualMismatches, _checkSoftclipsForMismatches); result.NumIndels = newSummary.NumIndels; result.NumNonNMismatches = newSummary.NumNonNMismatches; result.NumNonNSoftclips = newSummary.NumNonNSoftclips; result.NumSoftclips = newSummary.NumSoftclips; result.NumMismatchesIncludeSoftclip = newSummary.NumMismatchesIncludeSoftclip; result.NumIndelBases = newSummary.NumIndelBases; result.NumInsertedBases = newSummary.NumInsertedBases; }
private Read GetReadWithSequence(string cigarString, string sequence) { var cigarData = new CigarAlignment(cigarString); return(new Read("chr1", new BamAlignment { Position = 99, // zero index for bam alignment CigarData = cigarData, Bases = sequence })); }
private Read GetTestRead(string cigarString, int prefixNs = 0, int suffixNs = 0) { var cigarData = new CigarAlignment(cigarString); return(new Read("chr1", new BamAlignment { Position = 99, // zero index for bam alignment CigarData = cigarData, Bases = string.Join(string.Empty, Enumerable.Repeat("N", prefixNs).Concat(Enumerable.Repeat("A", (int)cigarData.GetReadSpan() - prefixNs - suffixNs)).Concat(Enumerable.Repeat("N", suffixNs))) })); }
private void GetStitchedCigarFromBam() { if (BamAlignment.TagData != null && BamAlignment.TagData.Length > 0) { var xcTag = BamAlignment.GetStringTag("XC"); if (xcTag != null) { _stitchedCigar = new CigarAlignment(xcTag); } } _stitchedCigarInitialized = true; }
public static bool HasInternalSoftclip(this CigarAlignment cigar) { var subCigar = cigar.GetSubCigar(cigar.GetPrefixClip() > 0 ? 1 : 0, cigar.Count - (cigar.GetSuffixClip() > 0 ? 1 : 0)); foreach (CigarOp op in subCigar) { if (op.Type == 'S') { return(true); } } return(false); }
public void HasOperationAtOpIndex() { var alignment = new CigarAlignment("5M3D4M7I2S8M"); Assert.True(alignment.HasOperationAtOpIndex(3, 'I')); Assert.False(alignment.HasOperationAtOpIndex(5, 'D')); Assert.False(CigarExtensions.HasOperationAtOpIndex(null, 3, 'D')); Assert.False(CigarExtensions.HasOperationAtOpIndex(null, 3, 'D', true)); Assert.True(alignment.HasOperationAtOpIndex(1, 'S', true)); Assert.False(alignment.HasOperationAtOpIndex(2, 'D', true)); Assert.False(alignment.HasOperationAtOpIndex(-1, 'D', true)); Assert.False(alignment.HasOperationAtOpIndex(8, 'D', true)); }
public void ValidateCigarAignment() { //valid cigar string Assert.True(new CigarAlignment("5M3D4M7I2S8M").IsSupported()); //invalid cigar string var alignment = new CigarAlignment(); alignment.Add(new CigarOp('M', 5)); alignment.Add(new CigarOp('U', 7)); alignment.Add(new CigarOp('I', 3)); alignment.Add(new CigarOp('M', 7)); Assert.False(alignment.IsSupported()); }
private static void ValidateCigar(CigarAlignment cigarData, int readLength) { if (cigarData.Count == 1 && (cigarData[0].Type == 'I' || cigarData[0].Type == 'D')) { throw new Exception(string.Format("Invalid cigar '{0}': indel must have anchor", cigarData)); } if (cigarData.Count > 0 && cigarData.GetReadSpan() != readLength) { throw new Exception(string.Format("Invalid cigar '{0}': does not match length {1} of read", cigarData, readLength)); } }
public static Read CreateRead(string chr, string sequence, int position, CigarAlignment cigar = null, byte[] qualities = null, int matePosition = 0, byte qualityForAll = 30) { return(new Read(chr, new BamAlignment { Bases = sequence, Position = position - 1, CigarData = cigar ?? new CigarAlignment(sequence.Length + "M"), Qualities = qualities ?? Enumerable.Repeat(qualityForAll, sequence.Length).ToArray(), MatePosition = matePosition - 1 })); }
private List <StitchedPosition> GetStitchedSites(CigarAlignment cigar1, CigarAlignment cigar2, long firstPos2, long firstPos1) { var expandedCigar1 = cigar1.Expand(); var expandedCigar2 = cigar2.Expand(); var posDict = new Dictionary <int, StitchedPosition>(); _positionsUsed = 0; var refPos = 0; foreach (var op in expandedCigar1) { if (!posDict.ContainsKey(refPos)) { posDict[refPos] = GetFreshStitchedPosition(); } if (op.IsReferenceSpan()) { posDict[refPos].MappedSite.R1Ops.Add(op); refPos++; } else { posDict[refPos].UnmappedPrefix.R1Ops.Add(op); } } // Reset the ref pos refPos = (int)(firstPos2 - firstPos1); foreach (var op in expandedCigar2) { if (!posDict.ContainsKey(refPos)) { posDict[refPos] = GetFreshStitchedPosition(); } if (op.IsReferenceSpan()) { posDict[refPos].MappedSite.R2Ops.Add(op); refPos++; } else { posDict[refPos].UnmappedPrefix.R2Ops.Add(op); } } return(posDict.OrderBy(x => x.Key).Select(x => x.Value).ToList()); }
public static List <CigarOp> Expand(this CigarAlignment cigar) { var expandedCigar = new List <CigarOp>(); foreach (CigarOp op in cigar) { for (var i = 0; i < op.Length; i++) { expandedCigar.Add(new CigarOp(op.Type, 1)); } } return(expandedCigar); }
public static List <char> ExpandToChars(this CigarAlignment cigar) { var expandedCigar = new List <char>(); foreach (CigarOp op in cigar) { for (var i = 0; i < op.Length; i++) { expandedCigar.Add(op.Type); } } return(expandedCigar); }
public static void ExpandToChars(this CigarAlignment cigar, List <char> expandedCigar) { expandedCigar.Clear(); int recycleIndex = 0; foreach (CigarOp op in cigar) { for (var i = 0; i < op.Length; ++i) { expandedCigar.Add(op.Type); ++recycleIndex; } } }
public static int NumIndels(this CigarAlignment cigar) { var numIndels = 0; for (var i = 0; i < cigar.Count; i++) { var op = cigar[i]; if (op.Type == 'I' || op.Type == 'D') { numIndels++; } } return(numIndels); }
private static void ValidateCigar(CigarAlignment cigarData, int readLength, string readName = "") { if (cigarData.Count == 1 && (cigarData[0].Type == 'I' || cigarData[0].Type == 'D')) { //tjd: change this to a warning to be more gentle to BWA-mem results //throw new InvalidDataException(string.Format("Invalid cigar '{0}': indel must have anchor", cigarData)); Logger.WriteWarningToLog("Anomalous alignment {0}. '{1}': indel without anchor", readName, cigarData); } if (cigarData.Count > 0 && cigarData.GetReadSpan() != readLength) { throw new InvalidDataException(string.Format("Check alignment {0}. Invalid cigar '{1}': does not match length {2} of read", readName, cigarData, readLength)); } }
static private void PackCigar(ref int offset, ref byte[] buffer, CigarAlignment cigarOps) { // pack the cigar data into the string foreach (CigarOp op in cigarOps) { uint cigarOp = ConversionHelper.CigarOpToNumber[op.Type]; if (cigarOp == BamConstants.LutError) { throw new ApplicationException( string.Format("ERROR: Encountered an unexpected CIGAR operation ({0}).", op.Type)); } BinaryIO.AddUIntBytes(ref buffer, ref offset, op.Length << BamConstants.CigarShift | cigarOp); } }