Beispiel #1
0
        public static bool HasInternalSoftclip(this CigarAlignment cigar)
        {
            var subCigar = cigar.GetSubCigar(cigar.GetPrefixClip() > 0 ? 1 : 0, cigar.Count - (cigar.GetSuffixClip() > 0 ? 1 : 0));

            foreach (CigarOp op in subCigar)
            {
                if (op.Type == 'S')
                {
                    return(true);
                }
            }
            return(false);
        }
Beispiel #2
0
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead = 0;
            var anchorLength     = 0;
            var endAnchorLength  = 0;
            var hasHitNonMatch   = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < operation.Length; i++)
                    {
                        summary.NumSoftclips++;

                        if (readSequence[startIndexInRead + i] != 'N')
                        {
                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i])
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < operation.Length; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += (int)operation.Length;
                    summary.NumInsertedBases += (int)operation.Length;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += (int)operation.Length;
                    summary.NumDeletedBases += (int)operation.Length;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += (int)operation.Length;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += (int)operation.Length;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += (int)operation.Length;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }
Beispiel #3
0
        public StitchingInfo GetStitchedCigar(CigarAlignment cigar1, int pos1, CigarAlignment cigar2, int pos2, bool reverseFirst, bool pairIsOutie)
        {
            var positions = GetStitchedSites(cigar1, cigar2, pos2, pos1);

            var success = true;

            var stitchingInfo = ReconcileSites(positions, reverseFirst, out success, pairIsOutie ? (int)cigar2.GetPrefixClip() : (int)cigar1.GetPrefixClip(), pairIsOutie ? (int)(cigar1.GetReadSpan() - (int)cigar1.GetSuffixClip()) : (int)(cigar2.GetReadSpan() - (int)cigar2.GetSuffixClip()), pairIsOutie);

            return(success ? stitchingInfo : null);
        }
Beispiel #4
0
        /// <summary>
        /// Log result info to a result file. Doesn't directly impact test, just useful for looking at the results all together.
        /// (Also useful for a deliverable output summary).
        /// </summary>
        /// <param name="resultFile"></param>
        /// <param name="scenario"></param>
        /// <param name="didStitch"></param>
        /// <param name="resultSet"></param>
        private void LogResult(string resultFile, StitchingScenario scenario, bool didStitch, AlignmentSet resultSet = null, string message = null)
        {
            var diagramLength = 12;
            // This is useful for looking at the results across the full test set.

            const string delimiter = ",";

            var visualResultsFile = resultFile + ".visuals.csv";

            if (!File.Exists(visualResultsFile))
            {
                using (var sw = File.CreateText(visualResultsFile))
                {
                    var leftOfDiagram = new List <string>()
                    {
                        "ID", "Pos", "Cigar", "Dirs", "Diagram Var"
                    };
                    var varDiagram = Enumerable.Repeat("", diagramLength);
                    var leftOfRef  = new List <string>()
                    {
                        "Pos", "Cigar", "Dirs", "Diagram Ref"
                    };
                    var refDiagram     = Enumerable.Repeat("", diagramLength);
                    var leftOfStitched = new List <string>()
                    {
                        "Pos", "Cigar", "Dirs", "Diagram Stitched"
                    };
                    var stitchedDiagram = Enumerable.Repeat("", diagramLength);
                    sw.WriteLine(string.Join(delimiter,
                                             leftOfDiagram.
                                             Concat(varDiagram).
                                             Concat(leftOfRef).
                                             Concat(refDiagram).
                                             Concat(leftOfStitched).
                                             Concat(stitchedDiagram)));
                }
            }


            using (var sw = File.AppendText(visualResultsFile))
            {
                Read stitchedRead = null;
                if (resultSet != null && resultSet.ReadsForProcessing.Any())
                {
                    stitchedRead = resultSet.ReadsForProcessing.First();
                }

                // First row
                var leftOfDiagram = new List <string>()
                {
                    scenario.Category + "-" + scenario.Id,
                    scenario.InputRead1.Position.ToString(), scenario.InputRead1.Cigar, scenario.InputRead1.Directions
                };
                var r1Cigar      = new CigarAlignment(scenario.InputRead1.Cigar);
                var r2Cigar      = new CigarAlignment(scenario.InputRead2.Cigar);
                var r1BasesStart = scenario.InputRead1.Position - 1 - (int)r1Cigar.GetPrefixClip();
                var r2BasesStart = scenario.InputRead2.Position - 1 - (int)r2Cigar.GetPrefixClip();

                if (r1BasesStart < 0 || r2BasesStart < 0)
                {
                    throw new ArgumentException("Test scenario has invalid position/cigar combination: " + scenario.InputRead1.Position + ":" + scenario.InputRead1.Cigar + " or " + scenario.InputRead2.Position + ":" + scenario.InputRead2.Cigar);
                }
                if (r1BasesStart < 0)
                {
                    r1BasesStart = 0;
                }
                if (r2BasesStart < 0)
                {
                    r2BasesStart = 0;
                }


                var r2CigarLength = 0;
                foreach (CigarOp op in r2Cigar)
                {
                    r2CigarLength += (int)op.Length;
                }

                var r2BasesEnd = r2BasesStart + r2CigarLength;

                var preOverlapCigar      = new CigarAlignment(scenario.InputRead1.Cigar).GetClippedCigar(0, (int)(r2BasesStart - r1BasesStart) + 1, includeWholeEndIns: true);
                var insertionsPreOverlap = preOverlapCigar.CountOperations('I');

                var expectedReadLength = r2BasesEnd - r1BasesStart + insertionsPreOverlap;
                r2BasesStart = r2BasesStart + insertionsPreOverlap;

                var varDiagram = Enumerable.Repeat("", r1BasesStart).Concat(ExpandCigar(r1Cigar,
                                                                                        new CigarDirection(scenario.InputRead1.Directions))).ToList();
                varDiagram = varDiagram.Concat(Enumerable.Repeat("", diagramLength - varDiagram.Count()).ToList()).ToList();
                var leftOfRef = new List <string>()
                {
                    scenario.InputRead2.Position.ToString(), scenario.InputRead2.Cigar, scenario.InputRead2.Directions
                };
                var refDiagram      = Enumerable.Repeat("", diagramLength);
                var leftOfStitched  = Enumerable.Repeat("", 3).ToList();
                var stitchedDiagram = Enumerable.Repeat("NA", diagramLength);
                if (stitchedRead != null && stitchedRead.CigarDirections != null)
                {
                    var stitchedBasesStart = stitchedRead.Position - 1 - (int)stitchedRead.CigarData.GetPrefixClip();
                    leftOfStitched = new List <string>()
                    {
                        stitchedRead.Position.ToString(), stitchedRead.CigarData.ToString(), GetDirectionsString(stitchedRead)
                    };
                    stitchedDiagram = Enumerable.Repeat("", stitchedBasesStart).Concat(ExpandCigar(stitchedRead.CigarData,
                                                                                                   stitchedRead.CigarDirections));
                }

                sw.WriteLine(string.Join(delimiter,
                                         leftOfDiagram.
                                         Concat(varDiagram).
                                         Concat(leftOfRef).
                                         Concat(refDiagram).
                                         Concat(leftOfStitched).
                                         Concat(stitchedDiagram)));

                // Second row
                var varDiagramR2 =
                    Enumerable.Repeat("", r2BasesStart)
                    .Concat(ExpandCigar(r2Cigar,
                                        new CigarDirection(scenario.InputRead2.Directions)));

                varDiagramR2 = varDiagramR2.Concat(Enumerable.Repeat("", diagramLength - varDiagramR2.Count())).ToList();
                var leftOfDiagramPad = new List <string>()
                {
                    "",
                    scenario.InputRead2.Position.ToString(),
                    scenario.InputRead2.Cigar,
                    scenario.InputRead2.Directions
                };

                var leftOfRefPad      = Enumerable.Repeat("", leftOfRef.Count);
                var leftOfStitchedPad = Enumerable.Repeat("", leftOfStitched.Count);
                var refDiagramR2      = Enumerable.Repeat("", diagramLength);

                var totalBasesCovered =
                    Enumerable.Repeat("", r1BasesStart).Concat(Enumerable.Repeat("+", expectedReadLength)).ToList();
                sw.WriteLine(string.Join(delimiter,
                                         leftOfDiagramPad.
                                         Concat(varDiagramR2).
                                         Concat(leftOfRefPad).
                                         Concat(refDiagramR2).
                                         Concat(leftOfStitchedPad).
                                         Concat(totalBasesCovered)
                                         ));

                sw.WriteLine();
            }

            if (!File.Exists(resultFile))
            {
                // Create a file to write to, and write the header.
                using (var sw = File.CreateText(resultFile))
                {
                    sw.WriteLine(string.Join(delimiter, new[] { "ID",
                                                                "R1_Pos", "R1_Cigar", "R1_Dirs",
                                                                "R2_Pos", "R2_Cigar", "R2_Dirs",
                                                                "ShouldStitch", "DidStitch",
                                                                "Exp_SR_Pos", "Exp_SR_Cigar", "Exp_SR_Dirs",
                                                                "Actual_SR_Pos", "Actual_SR_Cigar", "Actual_SR_Dirs",
                                                                "Notes", "Pass", "Message" }));
                }
            }


            using (var sw = File.AppendText(resultFile))
            {
                // Add everything we know from the input scenario, and whether it did stitch.
                var fields = new List <string>()
                {
                    scenario.Category + "-" + scenario.Id,
                    scenario.InputRead1.Position.ToString(), scenario.InputRead1.Cigar, scenario.InputRead1.Directions,
                    scenario.InputRead2.Position.ToString(), scenario.InputRead2.Cigar, scenario.InputRead2.Directions,
                    scenario.ShouldStitch.ToString(), didStitch.ToString(),
                    scenario.OutputRead1.Position.ToString(), scenario.OutputRead1.Cigar, scenario.OutputRead1.Directions,
                };

                var stitchResultsMatch    = false;
                var cigarResultsMatch     = false;
                var directionResultsMatch = false;

                stitchResultsMatch = scenario.ShouldStitch == didStitch;

                // Add the info from the output reads
                if (resultSet != null && resultSet.ReadsForProcessing.Any() && resultSet.ReadsForProcessing.First().CigarDirections != null)
                {
                    var stitchedRead = resultSet.ReadsForProcessing.First();
                    var directions   = GetDirectionsString(stitchedRead);

                    fields.AddRange(new List <string>()
                    {
                        stitchedRead.Position.ToString(),
                        stitchedRead.CigarData.ToString(),
                        directions
                    });

                    cigarResultsMatch     = !scenario.ShouldStitch || OutputCigarsMatch(scenario, resultSet);
                    directionResultsMatch = !scenario.ShouldStitch || OutputDirectionsMatch(scenario, resultSet);
                }
                else
                {
                    fields.AddRange(new List <string>()
                    {
                        "", "", ""
                    });
                }

                // Determine if this scenario "Passed" (i.e. matched expectations). (TODO if the resultSet is null, it failed -- is that valid?)
                var testResult = (!scenario.ShouldStitch && stitchResultsMatch) || (stitchResultsMatch && cigarResultsMatch && directionResultsMatch);
                fields.Add(Sanitize(scenario.Notes, Convert.ToChar(delimiter)));
                fields.Add(testResult.ToString());

                fields.Add(message);
                // Write scenario results to file
                sw.WriteLine(string.Join(delimiter, fields));
            }
        }
Beispiel #5
0
 public static CigarAlignment GetCigarWithoutProbeClips(this CigarAlignment cigar, bool isRead1)
 {
     return(isRead1 ?
            cigar.GetSubCigar(cigar.GetPrefixClip() > 0 ? 1 : 0, cigar.Count) :
            cigar.GetSubCigar(0, cigar.Count - (cigar.GetSuffixClip() > 0 ? 1 : 0)));
 }
Beispiel #6
0
 public static uint GetReadSpanBetweenClippedEnds(this CigarAlignment cigar)
 {
     return(cigar.GetReadSpan() - cigar.GetPrefixClip() - cigar.GetSuffixClip());
 }
        public static AlignmentSummary GetAlignmentSummary(int startIndexInReference, CigarAlignment cigarData, string refSequence, string readSequence, bool trackActualMismatches = true, bool checkSoftclipsForMismatches = true, int probeSoftclipPrefix = 0, int probeSoftclipSuffix = 0)
        {
            var summary = new AlignmentSummary();

            summary.Cigar = cigarData;

            if (checkSoftclipsForMismatches)
            {
                startIndexInReference = startIndexInReference - (int)cigarData.GetPrefixClip();
            }

            var startIndexInRead   = 0;
            var anchorLength       = 0;
            var endAnchorLength    = 0;
            var hasHitNonMatch     = false;
            var hasHitNonNSoftclip = false;

            for (var cigarOpIndex = 0; cigarOpIndex < cigarData.Count; cigarOpIndex++)
            {
                var operation = cigarData[cigarOpIndex];
                var opLength  = (int)(operation.Length);
                switch (operation.Type)
                {
                case 'S':     // soft-clip
                    for (var i = 0; i < opLength; i++)
                    {
                        summary.NumSoftclips++;

                        // No special treatement for Ns that are inside the softclip. Because the whole N-softclip distinction was meant to deal with padding-type softclips, I think.
                        if (readSequence[startIndexInRead + i] != 'N' || hasHitNonNSoftclip)
                        {
                            hasHitNonNSoftclip = true;

                            summary.NumNonNSoftclips++;

                            if (checkSoftclipsForMismatches)
                            {
                                if (startIndexInReference + i < 0 ||
                                    startIndexInReference + i >= refSequence.Length)
                                {
                                    summary.NumMismatchesIncludeSoftclip++;
                                }
                                else if (readSequence[startIndexInRead + i] !=
                                         refSequence[startIndexInReference + i] && readSequence[startIndexInRead + i] != 'N')
                                {
                                    summary.NumMismatchesIncludeSoftclip++;

                                    if (trackActualMismatches)
                                    {
                                        if (summary.MismatchesIncludeSoftclip == null)
                                        {
                                            summary.MismatchesIncludeSoftclip = new List <string> {
                                            };
                                        }

                                        // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                        var mismatch = string.Format("{0}_{1}_{2}",
                                                                     startIndexInReference + i,
                                                                     refSequence[startIndexInReference + i],
                                                                     readSequence[startIndexInRead + i]);
                                        summary.MismatchesIncludeSoftclip.Add(mismatch);
                                    }
                                }
                            }
                        }
                        //else
                        //{
                        //    if (!hasHitNonNSoftclip)
                        //    {
                        //        nSoftclipLength++;
                        //    }
                        //}
                    }
                    break;

                case 'M':     // match or mismatch
                    for (var i = 0; i < opLength; i++)
                    {
                        if (startIndexInReference + i > refSequence.Length - 1)
                        {
                            return(null);

                            throw new InvalidDataException(
                                      "Read goes off the end of the genome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        if (startIndexInReference + i < 0)
                        {
                            throw new InvalidDataException(
                                      "Read would be before beginning of the chromosome: " + startIndexInReference + ":" +
                                      cigarData.ToString() + " vs " + startIndexInReference + " + " + refSequence.Length);
                        }

                        var baseAtIndex = readSequence[startIndexInRead + i];
                        if (baseAtIndex != 'N' && baseAtIndex !=
                            refSequence[startIndexInReference + i])
                        {
                            summary.NumMismatches++;
                            summary.NumMismatchesIncludeSoftclip++;

                            if (trackActualMismatches)
                            {
                                if (summary.MismatchesIncludeSoftclip == null)
                                {
                                    summary.MismatchesIncludeSoftclip = new List <string> {
                                    };
                                }

                                // TODO WHEN KILL HYGEA, remove this if we're not using anymore, to save time
                                var mismatch = string.Format("{0}_{1}_{2}", startIndexInReference + i,
                                                             refSequence[startIndexInReference + i], readSequence[startIndexInRead + i]);
                                summary.MismatchesIncludeSoftclip.Add(mismatch);
                            }

                            hasHitNonMatch  = true;
                            endAnchorLength = 0;
                        }
                        else
                        {
                            if (baseAtIndex != 'N')
                            {
                                summary.NumMatches++;
                            }

                            if (!hasHitNonMatch)
                            {
                                anchorLength++;
                            }
                            endAnchorLength++;
                        }
                    }
                    break;

                case 'I':     // insertion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases    += opLength;
                    summary.NumInsertedBases += opLength;
                    break;

                case 'D':     // deletion
                    hasHitNonMatch  = true;
                    endAnchorLength = 0;
                    summary.NumIndels++;
                    summary.NumIndelBases   += opLength;
                    summary.NumDeletedBases += opLength;
                    break;
                }


                if (operation.IsReadSpan())
                {
                    startIndexInRead += opLength;
                }

                if (operation.IsReferenceSpan())
                {
                    startIndexInReference += opLength;
                }
                if (checkSoftclipsForMismatches && operation.Type == 'S')
                {
                    startIndexInReference += opLength;
                }
            }

            summary.AnchorLength = Math.Min(anchorLength, endAnchorLength);

            return(summary);
        }