public static bool HasInternalSoftclip(this CigarAlignment cigar) { var subCigar = cigar.GetSubCigar(cigar.GetPrefixClip() > 0 ? 1 : 0, cigar.Count - (cigar.GetSuffixClip() > 0 ? 1 : 0)); foreach (CigarOp op in subCigar) { if (op.Type == 'S') { return(true); } } return(false); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < operation.Length; i++) { summary.NumSoftclips++; if (readSequence[startIndexInRead + i] != 'N') { summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i]) { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } } break; case 'M': // match or mismatch for (var i = 0; i < operation.Length; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumInsertedBases += (int)operation.Length; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += (int)operation.Length; summary.NumDeletedBases += (int)operation.Length; break; } if (operation.IsReadSpan()) { startIndexInRead += (int)operation.Length; } if (operation.IsReferenceSpan()) { startIndexInReference += (int)operation.Length; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += (int)operation.Length; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }
public StitchingInfo GetStitchedCigar(CigarAlignment cigar1, int pos1, CigarAlignment cigar2, int pos2, bool reverseFirst, bool pairIsOutie) { var positions = GetStitchedSites(cigar1, cigar2, pos2, pos1); var success = true; var stitchingInfo = ReconcileSites(positions, reverseFirst, out success, pairIsOutie ? (int)cigar2.GetPrefixClip() : (int)cigar1.GetPrefixClip(), pairIsOutie ? (int)(cigar1.GetReadSpan() - (int)cigar1.GetSuffixClip()) : (int)(cigar2.GetReadSpan() - (int)cigar2.GetSuffixClip()), pairIsOutie); return(success ? stitchingInfo : null); }
/// <summary> /// Log result info to a result file. Doesn't directly impact test, just useful for looking at the results all together. /// (Also useful for a deliverable output summary). /// </summary> /// <param name="resultFile"></param> /// <param name="scenario"></param> /// <param name="didStitch"></param> /// <param name="resultSet"></param> private void LogResult(string resultFile, StitchingScenario scenario, bool didStitch, AlignmentSet resultSet = null, string message = null) { var diagramLength = 12; // This is useful for looking at the results across the full test set. const string delimiter = ","; var visualResultsFile = resultFile + ".visuals.csv"; if (!File.Exists(visualResultsFile)) { using (var sw = File.CreateText(visualResultsFile)) { var leftOfDiagram = new List <string>() { "ID", "Pos", "Cigar", "Dirs", "Diagram Var" }; var varDiagram = Enumerable.Repeat("", diagramLength); var leftOfRef = new List <string>() { "Pos", "Cigar", "Dirs", "Diagram Ref" }; var refDiagram = Enumerable.Repeat("", diagramLength); var leftOfStitched = new List <string>() { "Pos", "Cigar", "Dirs", "Diagram Stitched" }; var stitchedDiagram = Enumerable.Repeat("", diagramLength); sw.WriteLine(string.Join(delimiter, leftOfDiagram. Concat(varDiagram). Concat(leftOfRef). Concat(refDiagram). Concat(leftOfStitched). Concat(stitchedDiagram))); } } using (var sw = File.AppendText(visualResultsFile)) { Read stitchedRead = null; if (resultSet != null && resultSet.ReadsForProcessing.Any()) { stitchedRead = resultSet.ReadsForProcessing.First(); } // First row var leftOfDiagram = new List <string>() { scenario.Category + "-" + scenario.Id, scenario.InputRead1.Position.ToString(), scenario.InputRead1.Cigar, scenario.InputRead1.Directions }; var r1Cigar = new CigarAlignment(scenario.InputRead1.Cigar); var r2Cigar = new CigarAlignment(scenario.InputRead2.Cigar); var r1BasesStart = scenario.InputRead1.Position - 1 - (int)r1Cigar.GetPrefixClip(); var r2BasesStart = scenario.InputRead2.Position - 1 - (int)r2Cigar.GetPrefixClip(); if (r1BasesStart < 0 || r2BasesStart < 0) { throw new ArgumentException("Test scenario has invalid position/cigar combination: " + scenario.InputRead1.Position + ":" + scenario.InputRead1.Cigar + " or " + scenario.InputRead2.Position + ":" + scenario.InputRead2.Cigar); } if (r1BasesStart < 0) { r1BasesStart = 0; } if (r2BasesStart < 0) { r2BasesStart = 0; } var r2CigarLength = 0; foreach (CigarOp op in r2Cigar) { r2CigarLength += (int)op.Length; } var r2BasesEnd = r2BasesStart + r2CigarLength; var preOverlapCigar = new CigarAlignment(scenario.InputRead1.Cigar).GetClippedCigar(0, (int)(r2BasesStart - r1BasesStart) + 1, includeWholeEndIns: true); var insertionsPreOverlap = preOverlapCigar.CountOperations('I'); var expectedReadLength = r2BasesEnd - r1BasesStart + insertionsPreOverlap; r2BasesStart = r2BasesStart + insertionsPreOverlap; var varDiagram = Enumerable.Repeat("", r1BasesStart).Concat(ExpandCigar(r1Cigar, new CigarDirection(scenario.InputRead1.Directions))).ToList(); varDiagram = varDiagram.Concat(Enumerable.Repeat("", diagramLength - varDiagram.Count()).ToList()).ToList(); var leftOfRef = new List <string>() { scenario.InputRead2.Position.ToString(), scenario.InputRead2.Cigar, scenario.InputRead2.Directions }; var refDiagram = Enumerable.Repeat("", diagramLength); var leftOfStitched = Enumerable.Repeat("", 3).ToList(); var stitchedDiagram = Enumerable.Repeat("NA", diagramLength); if (stitchedRead != null && stitchedRead.CigarDirections != null) { var stitchedBasesStart = stitchedRead.Position - 1 - (int)stitchedRead.CigarData.GetPrefixClip(); leftOfStitched = new List <string>() { stitchedRead.Position.ToString(), stitchedRead.CigarData.ToString(), GetDirectionsString(stitchedRead) }; stitchedDiagram = Enumerable.Repeat("", stitchedBasesStart).Concat(ExpandCigar(stitchedRead.CigarData, stitchedRead.CigarDirections)); } sw.WriteLine(string.Join(delimiter, leftOfDiagram. Concat(varDiagram). Concat(leftOfRef). Concat(refDiagram). Concat(leftOfStitched). Concat(stitchedDiagram))); // Second row var varDiagramR2 = Enumerable.Repeat("", r2BasesStart) .Concat(ExpandCigar(r2Cigar, new CigarDirection(scenario.InputRead2.Directions))); varDiagramR2 = varDiagramR2.Concat(Enumerable.Repeat("", diagramLength - varDiagramR2.Count())).ToList(); var leftOfDiagramPad = new List <string>() { "", scenario.InputRead2.Position.ToString(), scenario.InputRead2.Cigar, scenario.InputRead2.Directions }; var leftOfRefPad = Enumerable.Repeat("", leftOfRef.Count); var leftOfStitchedPad = Enumerable.Repeat("", leftOfStitched.Count); var refDiagramR2 = Enumerable.Repeat("", diagramLength); var totalBasesCovered = Enumerable.Repeat("", r1BasesStart).Concat(Enumerable.Repeat("+", expectedReadLength)).ToList(); sw.WriteLine(string.Join(delimiter, leftOfDiagramPad. Concat(varDiagramR2). Concat(leftOfRefPad). Concat(refDiagramR2). Concat(leftOfStitchedPad). Concat(totalBasesCovered) )); sw.WriteLine(); } if (!File.Exists(resultFile)) { // Create a file to write to, and write the header. using (var sw = File.CreateText(resultFile)) { sw.WriteLine(string.Join(delimiter, new[] { "ID", "R1_Pos", "R1_Cigar", "R1_Dirs", "R2_Pos", "R2_Cigar", "R2_Dirs", "ShouldStitch", "DidStitch", "Exp_SR_Pos", "Exp_SR_Cigar", "Exp_SR_Dirs", "Actual_SR_Pos", "Actual_SR_Cigar", "Actual_SR_Dirs", "Notes", "Pass", "Message" })); } } using (var sw = File.AppendText(resultFile)) { // Add everything we know from the input scenario, and whether it did stitch. var fields = new List <string>() { scenario.Category + "-" + scenario.Id, scenario.InputRead1.Position.ToString(), scenario.InputRead1.Cigar, scenario.InputRead1.Directions, scenario.InputRead2.Position.ToString(), scenario.InputRead2.Cigar, scenario.InputRead2.Directions, scenario.ShouldStitch.ToString(), didStitch.ToString(), scenario.OutputRead1.Position.ToString(), scenario.OutputRead1.Cigar, scenario.OutputRead1.Directions, }; var stitchResultsMatch = false; var cigarResultsMatch = false; var directionResultsMatch = false; stitchResultsMatch = scenario.ShouldStitch == didStitch; // Add the info from the output reads if (resultSet != null && resultSet.ReadsForProcessing.Any() && resultSet.ReadsForProcessing.First().CigarDirections != null) { var stitchedRead = resultSet.ReadsForProcessing.First(); var directions = GetDirectionsString(stitchedRead); fields.AddRange(new List <string>() { stitchedRead.Position.ToString(), stitchedRead.CigarData.ToString(), directions }); cigarResultsMatch = !scenario.ShouldStitch || OutputCigarsMatch(scenario, resultSet); directionResultsMatch = !scenario.ShouldStitch || OutputDirectionsMatch(scenario, resultSet); } else { fields.AddRange(new List <string>() { "", "", "" }); } // Determine if this scenario "Passed" (i.e. matched expectations). (TODO if the resultSet is null, it failed -- is that valid?) var testResult = (!scenario.ShouldStitch && stitchResultsMatch) || (stitchResultsMatch && cigarResultsMatch && directionResultsMatch); fields.Add(Sanitize(scenario.Notes, Convert.ToChar(delimiter))); fields.Add(testResult.ToString()); fields.Add(message); // Write scenario results to file sw.WriteLine(string.Join(delimiter, fields)); } }
public static CigarAlignment GetCigarWithoutProbeClips(this CigarAlignment cigar, bool isRead1) { return(isRead1 ? cigar.GetSubCigar(cigar.GetPrefixClip() > 0 ? 1 : 0, cigar.Count) : cigar.GetSubCigar(0, cigar.Count - (cigar.GetSuffixClip() > 0 ? 1 : 0))); }
public static uint GetReadSpanBetweenClippedEnds(this CigarAlignment cigar) { return(cigar.GetReadSpan() - cigar.GetPrefixClip() - cigar.GetSuffixClip()); }
public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0) { var summary = new AlignmentSummary(); summary.Cigar = cigarData; if (checkSoftclipsForMismatches) { startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip(); } var startIndexInRead = 0; var anchorLength = 0; var endAnchorLength = 0; var hasHitNonMatch = false; var hasHitNonNSoftclip = false; for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++) { var operation = cigarData[cigarOpIndex]; var opLength = (int)(operation.Length); switch (operation.Type) { case 'S': // soft-clip for (var i = 0; i < opLength; i++) { summary.NumSoftclips++; // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think. if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip) { hasHitNonNSoftclip = true; summary.NumNonNSoftclips++; if (checkSoftclipsForMismatches) { if (startIndexInReference + i < 0 || startIndexInReference + i >= refSequence.Length) { summary.NumMismatchesIncludeSoftclip++; } else if (readSequence[startIndexInRead + i] != refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N') { summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } } } } //else //{ // if (!hasHitNonNSoftclip) // { // nSoftclipLength++; // } //} } break; case 'M': // match or mismatch for (var i = 0; i < opLength; i++) { if (startIndexInReference + i > refSequence.Length - 1) { return(null); throw new InvalidDataException( "Read goes off the end of the genome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } if (startIndexInReference + i < 0) { throw new InvalidDataException( "Read would be before beginning of the chromosome: " + startIndexInReference + ":" + cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length); } var baseAtIndex = readSequence[startIndexInRead + i]; if (baseAtIndex != 'N' && baseAtIndex != refSequence[startIndexInReference + i]) { summary.NumMismatches++; summary.NumMismatchesIncludeSoftclip++; if (trackActualMismatches) { if (summary.MismatchesIncludeSoftclip == null) { summary.MismatchesIncludeSoftclip = new List <string> { }; } // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i, refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]); summary.MismatchesIncludeSoftclip.Add(mismatch); } hasHitNonMatch = true; endAnchorLength = 0; } else { if (baseAtIndex != 'N') { summary.NumMatches++; } if (!hasHitNonMatch) { anchorLength++; } endAnchorLength++; } } break; case 'I': // insertion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumInsertedBases += opLength; break; case 'D': // deletion hasHitNonMatch = true; endAnchorLength = 0; summary.NumIndels++; summary.NumIndelBases += opLength; summary.NumDeletedBases += opLength; break; } if (operation.IsReadSpan()) { startIndexInRead += opLength; } if (operation.IsReferenceSpan()) { startIndexInReference += opLength; } if (checkSoftclipsForMismatches && operation.Type == 'S') { startIndexInReference += opLength; } } summary.AnchorLength = Math.Min(anchorLength, endAnchorLength); return(summary); }