/// <summary> /// For each group in the grouping, this method traverses through each range /// in the group and normalizes the ranges down to the minimal spanning set /// required to still show the same range spans. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// -> 10 to 100 /// -> 200 to 250 /// -> 35 to 45 /// -> 90 to 150 /// /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to: /// For minOverlap = 0 /// /// -> 10 to 150 /// -> 200 to 250 /// /// for minOverlap = -50 /// /// -> 10 to 250 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping(); List <ISequenceRange> sortedRanges = new List <ISequenceRange>(); foreach (List <ISequenceRange> rangeList in this._groups.Values) { sortedRanges.AddRange(rangeList); sortedRanges.Sort(); while (sortedRanges.Count > 0) { ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID, sortedRanges[0].Start, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); seqRangeGroup.Add(seqRange); if (sortedRanges.Count > 0) { while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap) { seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); } } } sortedRanges.Clear(); } return(seqRangeGroup); }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in _groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(_groups[id]); if (query._groups.ContainsKey(id)) { querySeqRanges.AddRange(query._groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in _groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(_groups[id]); if (query._groups.ContainsKey(id)) { querySeqRanges.AddRange(query._groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return(result); }