/// <summary> /// Merges query sequence ranges with this sequence ranges. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 3 to 15 4 to 10 /// 5 to 18 11 to 20 /// /// Result for minOverlap set to 1 /// 3 to 20 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query sequence ranges.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The merged sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameQuery); } List <ISequenceRange> ranges = new List <ISequenceRange>(this.Flatten()); ranges.AddRange(query.Flatten()); SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges); return(seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired)); }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return result; }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return result; }
/// <summary> /// Merges query sequence ranges with this sequence ranges. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 3 to 15 4 to 10 /// 5 to 18 11 to 20 /// /// Result for minOverlap set to 1 /// 3 to 20 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query sequence ranges.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The merged sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameQuery); } List<ISequenceRange> ranges = new List<ISequenceRange>(this.Flatten()); ranges.AddRange(query.Flatten()); SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges); return seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired); }
/// <summary> /// Formats and writes the query region (Output of Merge/Subtract/Intersect) operations /// </summary> /// <param name="resultWorkbook"> /// Workbook to which Range has to be written /// </param> /// <param name="resultSheetname">New worksheet name</param> /// <param name="resultGroup">Output group</param> /// <param name="groupsData"> /// Complete input groups information /// Contains individual Group, sheet and addresses of ISequenceRange /// </param> private void WriteSequenceRange( Workbook resultWorkbook, string resultSheetname, SequenceRangeGrouping resultGroup, Dictionary<SequenceRangeGrouping, GroupData> groupsData, bool showMetadata, bool showBasePairCount) { if (resultGroup.GroupIDs.Count() > 0) { int baseRowIndex = 2, baseColumnIndex = 2; int dataRowIndex = 0, dataColumnIndex = 0; int totalColumnCount = 0; object[,] values; List<string> hyperlinks = null; List<ISequenceRange> resultSequenceRanges = null; Dictionary<SequenceRangeGrouping, Dictionary<string, int>> groupSheetIndices = null; Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> columnData = null; Dictionary<ISequenceRange, string> rangedata = null; Dictionary<SequenceRangeGrouping, SequenceRangeGrouping> allSheetData = null; Dictionary<SequenceRangeGrouping, int> allSheetCount = null; SequenceRangeGrouping groupToMerge = null; SequenceRangeGrouping referenceGroup = null; SequenceRangeGrouping queryGroup = null; SequenceRangeGrouping sheetGroup = null; int sheetCount = 0; Range activeRange = null; var resultWorksheet = resultWorkbook.Worksheets.Add( Type.Missing, resultWorkbook.Worksheets.get_Item(resultWorkbook.Worksheets.Count), Type.Missing, Type.Missing) as Worksheet; ((_Worksheet)resultWorksheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; resultWorksheet.Name = resultSheetname; activeRange = resultWorksheet.get_Range(GetColumnString(baseColumnIndex) + baseRowIndex, Type.Missing); rangedata = groupsData.Values.Select(gd => gd.Metadata) // Get the Metadata .SelectMany(sd => sd.Values).ToList() // Get the Dictionary .SelectMany(rd => rd).ToList().ToDictionary(k => k.Key, v => v.Value); // Convert to dictionary groupSheetIndices = new Dictionary<SequenceRangeGrouping, Dictionary<string, int>>(); baseRowIndex = this.WriteSequenceRangeHeader( resultWorksheet, groupSheetIndices, groupsData, baseRowIndex, baseColumnIndex, ref totalColumnCount, showMetadata, showBasePairCount); totalColumnCount -= (baseColumnIndex - 1); foreach (string resultGroupKey in resultGroup.GroupIDs) { resultSequenceRanges = resultGroup.GetGroup(resultGroupKey); dataRowIndex = 0; values = new object[resultSequenceRanges.Count, totalColumnCount]; activeRange = resultWorksheet.get_Range( GetColumnString(baseColumnIndex) + baseRowIndex, Missing.Value); activeRange = activeRange.get_Resize(resultSequenceRanges.Count, totalColumnCount); foreach (ISequenceRange resultSequenceRange in resultSequenceRanges) { referenceGroup = null; queryGroup = null; dataColumnIndex = 0; allSheetData = new Dictionary<SequenceRangeGrouping, SequenceRangeGrouping>(); allSheetCount = new Dictionary<SequenceRangeGrouping, int>(); values[dataRowIndex, dataColumnIndex] = resultSequenceRange.ID; dataColumnIndex++; values[dataRowIndex, dataColumnIndex] = resultSequenceRange.Start; dataColumnIndex++; values[dataRowIndex, dataColumnIndex] = resultSequenceRange.End; dataColumnIndex++; if (showMetadata) { for (int index = 3; index < rangeHeaders.Count; index++) { values[dataRowIndex, dataColumnIndex] = ExtractRangeMetadata( resultSequenceRange, rangeHeaders[index]); dataColumnIndex++; } } columnData = PrepareSequenceRowRange( groupsData, groupSheetIndices, rangedata, resultSequenceRange); foreach (var columnGroup in columnData) { if (showBasePairCount) { // Get the parent ranges for Group's range in a column groupToMerge = new SequenceRangeGrouping(columnGroup.Value.Item3); if (1 < columnGroup.Value.Item3.Count) { groupToMerge = groupToMerge.MergeOverlaps(0, false); } // Render data for Group's range in a column values[dataRowIndex, columnGroup.Key] = groupToMerge.GroupRanges.Sum(sr => sr.End - sr.Start); values[dataRowIndex, columnGroup.Key + 1] = columnGroup.Value.Item3.Count; } else { values[dataRowIndex, columnGroup.Key] = columnGroup.Value.Item3.Count; } // Let the hyperlink added hyperlinks = new List<string>(); foreach (ISequenceRange range in columnGroup.Value.Item3) { hyperlinks.AddRange(rangedata[range].Split(',')); } this.ShowHyperlink( hyperlinks, activeRange, columnGroup.Key, dataRowIndex, showBasePairCount); if (showBasePairCount) { // Calculate data for all group if (allSheetData.TryGetValue(columnGroup.Value.Item1, out sheetGroup)) { allSheetData[columnGroup.Value.Item1] = sheetGroup.MergeOverlaps( groupToMerge, 0, false); } else { allSheetData[columnGroup.Value.Item1] = groupToMerge; } // Build up reference & query groups (later get common range using this) if (columnGroup.Value.Item2) { if (null == referenceGroup) { referenceGroup = groupToMerge; } else { referenceGroup = referenceGroup.MergeOverlaps(groupToMerge, 0, false); } } else { if (null == queryGroup) { queryGroup = groupToMerge; } else { queryGroup = queryGroup.MergeOverlaps(groupToMerge, 0, false); } } } // Calculate range count for all group if (allSheetCount.TryGetValue(columnGroup.Value.Item1, out sheetCount)) { allSheetCount[columnGroup.Value.Item1] += columnGroup.Value.Item3.Count; } else { allSheetCount[columnGroup.Value.Item1] = columnGroup.Value.Item3.Count; } } // Render all columns in SequenceRangeGrouping foreach (var allData in allSheetCount) { dataColumnIndex = groupSheetIndices[allData.Key].Values.Min() - (showBasePairCount ? 2 : 1); if (showBasePairCount) { values[dataRowIndex, dataColumnIndex] = allSheetData[allData.Key].GroupRanges.Sum(sr => sr.End - sr.Start); dataColumnIndex++; } values[dataRowIndex, dataColumnIndex] = allData.Value; } if (showBasePairCount) { // Render common column in SequenceRangeGrouping if (null != referenceGroup && null != queryGroup) { referenceGroup = referenceGroup.Intersect( queryGroup, 0, IntersectOutputType.OverlappingPiecesOfIntervals, false); values[dataRowIndex, totalColumnCount - 1] = referenceGroup.GroupRanges.Sum(sr => sr.End - sr.Start); } else { values[dataRowIndex, totalColumnCount - 1] = 0; } } dataRowIndex++; } activeRange.set_Value(Missing.Value, values); baseRowIndex += dataRowIndex; } resultWorksheet.Columns.AutoFit(); this.NormalizeColumWidths(resultWorksheet.UsedRange); this.EnableAllControls(); } else { MessageBox.Show( Resources.NO_RESULT, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Information); } }
CreateSequenceRangeGroupingsForVennDiagram ( SequenceRangeGrouping srgA, SequenceRangeGrouping srgB, SequenceRangeGrouping srgC, out SequenceRangeGrouping srgOnly_A, out SequenceRangeGrouping srgOnly_B, out SequenceRangeGrouping srgOnly_C, out SequenceRangeGrouping srgOnly_AB, out SequenceRangeGrouping srgOnly_AC, out SequenceRangeGrouping srgOnly_BC, out SequenceRangeGrouping srgOnly_ABC ) { // create the proper sets for a 3 circle Venn Diagram SequenceRangeGrouping srgBC = srgB.MergeOverlaps(srgC); srgOnly_A = srgA.Subtract(srgBC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); SequenceRangeGrouping srgAC = srgA.MergeOverlaps(srgC); srgOnly_B = srgB.Subtract(srgAC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB); srgOnly_C = srgC.Subtract(srgAB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgAB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_AB = srgAB.Subtract(srgC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgAC = srgA.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_AC = srgAC.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgBC = srgB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_BC = srgBC.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgOnly_ABC = srgAB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); }
CreateSequenceRangeGroupingsForVennDiagram ( SequenceRangeGrouping srgA, SequenceRangeGrouping srgB, out SequenceRangeGrouping srgOnly_A, out SequenceRangeGrouping srgOnly_B, out SequenceRangeGrouping srgOnly_AB ) { // Create the proper intersected sets from the two original SequenceRangeGroups SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB); // use set terminology (Union) or boolean logic (Or) srgOnly_A = srgAB.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); // TODO: Subtract and Intersect should use same 'logic' (for bool 3rd arg) srgOnly_B = srgAB.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgOnly_AB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals); }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return(result); }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; // Get Aligned sequences IList<SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List<ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
/// <summary> /// Merges two sequence ranges with default parameters. /// </summary> /// <param name="referenceSequence">Reference sequence for merging.</param> /// <param name="querySequence">Query sequence for merging.</param> /// <returns>SequenceRangeGrouping with merged output.</returns> public static SequenceRangeGrouping DoBEDMerge(SequenceRangeGrouping referenceSequence, SequenceRangeGrouping querySequence) { return referenceSequence.MergeOverlaps(querySequence); }
// // Read a Bed file into memory // public static SequenceRangeGrouping ReadBedFile(string filename) { var parser = new BedParser(); IList<ISequenceRange> listSequenceRange = parser.ParseRange(filename); if (verbose) { //listSequenceRange.ToString(); Console.Error.WriteLine("Processed File: {0}", filename); ListSequenceRangeToString(listSequenceRange); } var srg = new SequenceRangeGrouping(listSequenceRange); if (arguments.normalizeInputs) { srg.MergeOverlaps(); // could be called Normalize() or Cannonicalize() } return srg; }