// // Read a Bed file into memory // public static SequenceRangeGrouping ReadBedFile(string filename) { var parser = new BedParser(); IList <ISequenceRange> listSequenceRange = parser.ParseRange(filename); if (verbose) { //listSequenceRange.ToString(); Console.Error.WriteLine("Processed File: {0}", filename); ListSequenceRangeToString(listSequenceRange); } var srg = new SequenceRangeGrouping(listSequenceRange); if (arguments.normalizeInputs) { srg.MergeOverlaps(); // could be called Normalize() or Cannonicalize() } return(srg); }
/// <summary> /// Tests chromoses with orphan regions /// </summary> /// <param name="alignmentMapobj">Sequence alignment map.</param> private static void TestOrphanRegions(SequenceAlignmentMap alignmentMapobj) { string expectedOutput; string actualOutput; expectedOutput = "9437-9447:"; actualOutput = string.Empty; // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Assert.Fail(); } List <ISequenceRange> orphanRegions = new List <ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); foreach (var range in mergedRegions.GroupRanges) { actualOutput += range.Start + "-" + range.End + ":"; } Assert.AreEqual(expectedOutput, actualOutput); }
/// <summary> /// Validate BED Operations(Merge,Intersect).. /// </summary> /// <param name="nodeName">Xml Node name for different inputs.</param> /// <param name="operationPam">Different Bed operations.</param> /// <param name="overlappingBasePair">overlapping base pair</param> /// <param name="isParentSeqRangeRequired">Is Parent Sequence Range required?</param> private void ValidateBedOperations(string nodeName, BedOperationsParameters operationPam, bool overlappingBasePair, bool isParentSeqRangeRequired) { // Get values from xml. string expectedRangeIDs = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.IDNode); string expectedStartIndex = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StartNode); string expectedEndIndex = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.EndNode); string referenceFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode).TestDir(); string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.QueryFilePath).TestDir(); string minimalOverlap = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.OverlapValue); SequenceRangeGrouping operationResult = null; // Parse a BED file. var parserObj = new BedParser(); SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath); SequenceRangeGrouping queryGroup = parserObj.ParseRangeGrouping(queryFilePath); var intersectOutputType = IntersectOutputType.OverlappingIntervals; if (overlappingBasePair) { intersectOutputType = IntersectOutputType.OverlappingPiecesOfIntervals; } var subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals; if (overlappingBasePair) { subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap; } switch (operationPam) { case BedOperationsParameters.Merge: operationResult = referenceGroup.MergeOverlaps(); break; case BedOperationsParameters.MergeWithPam: operationResult = referenceGroup.MergeOverlaps(queryGroup, 0, isParentSeqRangeRequired); break; case BedOperationsParameters.Intersect: operationResult = referenceGroup.Intersect(queryGroup, long.Parse(minimalOverlap, null), intersectOutputType, isParentSeqRangeRequired); break; case BedOperationsParameters.MergeQueryWithReference: operationResult = queryGroup.MergeOverlaps(referenceGroup, 0, isParentSeqRangeRequired); break; case BedOperationsParameters.Subtract: operationResult = referenceGroup.Subtract(queryGroup, long.Parse(minimalOverlap, null), subtractOutputType, isParentSeqRangeRequired); break; default: break; } // Get a result SequenceGroup Id. IEnumerable <string> groupId = operationResult.GroupIDs; string[] expectedRangeIdsArray = expectedRangeIDs.Split(','); string[] expectedStartIndexArray = expectedStartIndex.Split(','); string[] expectedEndIndexArray = expectedEndIndex.Split(','); int i = 0; foreach (string grpId in groupId) { string rangeId = grpId; List <ISequenceRange> rangeList = operationResult.GetGroup(rangeId); // Validate result sequence range. foreach (ISequenceRange range in rangeList) { Assert.AreEqual(expectedRangeIdsArray[i], range.ID); Assert.AreEqual(expectedStartIndexArray[i], range.Start.ToString((IFormatProvider)null)); Assert.AreEqual(expectedEndIndexArray[i], range.End.ToString((IFormatProvider)null)); i++; } } // Validate ParentSeqRange. bool result = ValidateParentSeqRange(operationResult, referenceGroup, queryGroup, isParentSeqRangeRequired); Assert.IsTrue(result); ApplicationLog.WriteLine("Bed Operations BVT: Successfully validated the BED SequenceID, Start and End Ranges"); }
public void MergeOperationTest() { string filepath = @"testdata\BED\Merge\Merge_single.BED"; string resultfilepath = "tmp_mergeresult.bed"; string expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; BedParser parser = new BedParser(); BedFormatter formatter = new BedFormatter(); SequenceRangeGrouping seqGrouping = null; SequenceRangeGrouping result = null; bool resultvalue = false; resultfilepath = "tmp_mergeresult.bed"; expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; seqGrouping = parser.ParseRangeGrouping(filepath); result = seqGrouping.MergeOverlaps(); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; result = seqGrouping.MergeOverlaps(0, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; result = seqGrouping.MergeOverlaps(0, false); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; result = seqGrouping.MergeOverlaps(0); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; result = seqGrouping.MergeOverlaps(0, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength0.BED"; result = seqGrouping.MergeOverlaps(0, false); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 0, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength1.BED"; result = seqGrouping.MergeOverlaps(1); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, 1, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength-1.BED"; result = seqGrouping.MergeOverlaps(-1); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, -1, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Single_MinLength-3.BED"; result = seqGrouping.MergeOverlaps(-3); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, seqGrouping, null, -3, false); Assert.IsTrue(resultvalue); string firstFile = @"testdata\BED\Merge\Merge_twofiles_1.BED"; string secondFile = @"testdata\BED\Merge\Merge_twofiles_2.BED"; SequenceRangeGrouping refSeqRange = parser.ParseRangeGrouping(firstFile); SequenceRangeGrouping querySeqRange = parser.ParseRangeGrouping(secondFile); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength0.BED"; result = refSeqRange.MergeOverlaps(querySeqRange); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, false); Assert.IsTrue(resultvalue); result = refSeqRange.MergeOverlaps(querySeqRange, 0, false); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, false); Assert.IsTrue(resultvalue); result = refSeqRange.MergeOverlaps(querySeqRange, 0, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, true); Assert.IsTrue(resultvalue); result = refSeqRange.MergeOverlaps(querySeqRange, 0); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, false); Assert.IsTrue(resultvalue); result = refSeqRange.MergeOverlaps(querySeqRange, 0, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, true); Assert.IsTrue(resultvalue); result = refSeqRange.MergeOverlaps(querySeqRange, 0, false); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 0, false); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength1.BED"; result = refSeqRange.MergeOverlaps(querySeqRange, 1, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 1, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength-1.BED"; result = refSeqRange.MergeOverlaps(querySeqRange, -1, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, -1, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength-3.BED"; result = refSeqRange.MergeOverlaps(querySeqRange, -3, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, -3, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength2.BED"; result = refSeqRange.MergeOverlaps(querySeqRange, 2, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 2, true); Assert.IsTrue(resultvalue); expectedresultpath = @"testdata\BED\Merge\Result_Merge_Two_MinLength6.BED"; result = refSeqRange.MergeOverlaps(querySeqRange, 6, true); formatter.Format(result, resultfilepath); resultvalue = CompareBEDOutput(resultfilepath, expectedresultpath); Assert.IsTrue(resultvalue); resultvalue = ValidateParentSeqRange(result, refSeqRange, querySeqRange, 6, true); Assert.IsTrue(resultvalue); }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne <SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne <SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList <PairedRead> pairedReads = null; // Get Aligned sequences IList <SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List <ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
/// <summary> /// Validate BED Operations(Merge,Intersect).. /// </summary> /// <param name="nodeName">Xml Node name for different inputs.</param> /// <param name="operationPam">Different Bed operations.</param> /// <param name="overlappingBasePair">overlapping base pair</param> /// <param name="IsParentSeqRangeRequired">Is Parent Sequence Range required?</param> static void ValidateBedOperations(string nodeName, BedOperationsParameters operationPam, bool overlappingBasePair, bool IsParentSeqRangeRequired) { // Get values from xml. string expectedRangeIDs = Utility._xmlUtil.GetTextValue( nodeName, Constants.IDNode); string expectedStartIndex = Utility._xmlUtil.GetTextValue( nodeName, Constants.StartNode); string expectedEndIndex = Utility._xmlUtil.GetTextValue( nodeName, Constants.EndNode); string referenceFilePath = Utility._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string queryFilePath = Utility._xmlUtil.GetTextValue( nodeName, Constants.QueryFilePath); string minimalOverlap = Utility._xmlUtil.GetTextValue( nodeName, Constants.OverlapValue); string rangeID = string.Empty; string actualStarts = string.Empty; string actualEnds = string.Empty; string actualIDs = string.Empty; bool result = false; List <ISequenceRange> rangeList = null; SequenceRangeGrouping operationResult = null; // Parse a BED file. BedParser parserObj = new BedParser(); SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath); SequenceRangeGrouping queryGroup = parserObj.ParseRangeGrouping(queryFilePath); IntersectOutputType intersectOutputType = IntersectOutputType.OverlappingIntervals; if (overlappingBasePair) { intersectOutputType = IntersectOutputType.OverlappingPiecesOfIntervals; } SubtractOutputType subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals; if (overlappingBasePair) { subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap; } switch (operationPam) { case BedOperationsParameters.Merge: operationResult = referenceGroup.MergeOverlaps(); break; case BedOperationsParameters.MergeWithPam: operationResult = referenceGroup.MergeOverlaps(queryGroup, 0, IsParentSeqRangeRequired); break; case BedOperationsParameters.Intersect: operationResult = referenceGroup.Intersect(queryGroup, long.Parse(minimalOverlap), intersectOutputType, IsParentSeqRangeRequired); break; case BedOperationsParameters.MergeQueryWithReference: operationResult = queryGroup.MergeOverlaps(referenceGroup, 0, IsParentSeqRangeRequired); break; case BedOperationsParameters.Subtract: operationResult = referenceGroup.Subtract(queryGroup, long.Parse(minimalOverlap), subtractOutputType, IsParentSeqRangeRequired); break; default: break; } // Get a result SequenceGroup Id. IEnumerable <string> groupId = operationResult.GroupIDs; foreach (string grpID in groupId) { rangeID = grpID; rangeList = operationResult.GetGroup(rangeID); // Validate result sequence range. foreach (ISequenceRange range in rangeList) { actualStarts = string.Concat(actualStarts, range.Start.ToString(), ","); actualEnds = string.Concat(actualEnds, range.End.ToString(), ","); actualIDs = string.Concat(actualIDs, range.ID.ToString(), ","); } } Assert.AreEqual(expectedRangeIDs, actualIDs.Substring(0, actualIDs.Length - 1)); Assert.AreEqual(expectedStartIndex, actualStarts.Substring(0, actualStarts.Length - 1)); Assert.AreEqual(expectedEndIndex, actualEnds.Substring(0, actualEnds.Length - 1)); // Validate ParentSeqRange. result = ValidateParentSeqRange(operationResult, referenceGroup, queryGroup, IsParentSeqRangeRequired); Assert.IsTrue(result); ApplicationLog.WriteLine( "Bed Parser BVT: Successfully validated the BED SequenceID, Start and End Ranges"); }
/// <summary> /// Validate Merge SequenceRangeGrouping. /// </summary> /// <param name="nodeName">Xml Node name for different inputs.</param> /// <param name="IsMergePam">Merge parameter</param> /// <param name="IsParentSeqRangesRequired">Is Parent Sequence Range required?</param> void MergeSequenceRange(string nodeName, bool IsMergePam, bool IsParentSeqRangesRequired) { // Get values from xml. string[] expectedRangeIDs = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.IDNode).Split(','); string[] expectedStartIndex = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.StartNode).Split(','); string[] expectedEndIndex = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.EndNode).Split(','); string filePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string queryFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.QueryFilePath); string rangeID = string.Empty; bool result = false; List <ISequenceRange> rangeList = null; SequenceRangeGrouping mergedGroup = null; // Parse a BED file. BedParser parserObj = new BedParser(); SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(filePath); SequenceRangeGrouping queryGroup = parserObj.ParseRangeGrouping(queryFilePath); // Merge a SequenceRangeGroup. if (IsMergePam) { mergedGroup = referenceGroup.MergeOverlaps(queryGroup, 0, IsParentSeqRangesRequired); } else { mergedGroup = referenceGroup.MergeOverlaps(); } // Get a merged SequenceGroup Id. IEnumerable <string> groupIds = mergedGroup.GroupIDs; int j = 0; foreach (string grpID in groupIds) { rangeID = grpID; rangeList = mergedGroup.GetGroup(rangeID); // Validate merged sequence range. foreach (ISequenceRange range in rangeList) { Assert.AreEqual(expectedStartIndex[j], range.Start.ToString((IFormatProvider)null)); Assert.AreEqual(expectedEndIndex[j], range.End.ToString((IFormatProvider)null)); Assert.AreEqual(expectedRangeIDs[j], range.ID.ToString((IFormatProvider)null)); j++; } } // Validate Parent SequenceRanges. result = ValidateParentSeqRange(mergedGroup, referenceGroup, queryGroup, IsParentSeqRangesRequired); Assert.IsTrue(result); ApplicationLog.WriteLine( "Bed Parser BVT: Successfully validated the merged SequeID, Start and End Ranges"); Console.WriteLine(string.Format((IFormatProvider)null, "Bed Parser BVT: Successfully validated the merged SequeID, Start and End Ranges")); }