/// <summary> /// Compares two sequence ranges. /// </summary> /// <param name="obj">SequenceRange instance to compare.</param> /// <returns> /// If the Start values of the two ranges are identical then the /// result of this comparison is the result from calling CompareTo() on /// the two End values. If the Start values are not equal then the result /// of this comparison is the result of calling CompareTo() on the two /// Start values. /// </returns> public int CompareTo(object obj) { if (obj == null) { return(0); } SequenceRange sequenceRange = obj as SequenceRange; if (obj == null) { return(0); } return(CompareTo(sequenceRange)); }
/// <summary> /// For each group in the grouping, this method traverses through each range /// in the group and normalizes the ranges down to the minimal spanning set /// required to still show the same range spans. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// -> 10 to 100 /// -> 200 to 250 /// -> 35 to 45 /// -> 90 to 150 /// /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to: /// For minOverlap = 0 /// /// -> 10 to 150 /// -> 200 to 250 /// /// for minOverlap = -50 /// /// -> 10 to 250 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The overlapped sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping(); List <ISequenceRange> sortedRanges = new List <ISequenceRange>(); foreach (List <ISequenceRange> rangeList in this.groups.Values) { sortedRanges.AddRange(rangeList); sortedRanges.Sort(); while (sortedRanges.Count > 0) { ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID, sortedRanges[0].Start, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); seqRangeGroup.Add(seqRange); if (sortedRanges.Count > 0) { while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap) { seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); } } } sortedRanges.Clear(); } return(seqRangeGroup); }
/// <summary> /// Convert given range containing any bed sequence to SequenceRange object /// </summary> /// <param name="userSelectedRange">Range of cells</param> /// <param name="columnMapping">A dictionary which has a column number to BED header mapping.</param> /// <returns>SequenceRange, Address object</returns> public static Dictionary<ISequenceRange, string> RangeToSequenceRange(Range userSelectedRange, Dictionary<int, string> columnMapping) { Dictionary<ISequenceRange, string> sequenceRanges = new Dictionary<ISequenceRange, string>(); SequenceRange sequenceRange; string rangeAddress, cellValue, columnName; object[,] cellRange = userSelectedRange.Value2 as object[,]; // cellrange will be a base 1 array int rangeWidth = columnMapping.Keys.Max() - columnMapping.Keys.Min() + 1; SortedSet<int> sortedColumn = new SortedSet<int>(columnMapping.Keys); bool idFound, startFound, endFound; for (int row = 1; row <= cellRange.GetLength(0); row++) { // If hidden row, then skip if ((userSelectedRange.Rows[row] as Range).EntireRow.Hidden) { continue; } // Reset the flags to false idFound = false; startFound = false; endFound = false; sequenceRange = new SequenceRange(); rangeAddress = string.Empty; for (int col = 1; col <= cellRange.GetLength(1); col++) { if (cellRange[row, col] != null) { cellValue = cellRange[row, col].ToString(); if (columnMapping.TryGetValue((userSelectedRange[1, col] as Range).Column, out columnName)) { switch (columnName) // mapped header for current column { case CHROM_ID: sequenceRange.ID = cellValue; idFound = true; break; case CHROM_START: sequenceRange.Start = long.Parse(cellValue, CultureInfo.InvariantCulture); startFound = true; break; case CHROM_END: sequenceRange.End = long.Parse(cellValue, CultureInfo.InvariantCulture); endFound = true; break; default: // any other item goes into the metadata dictionary sequenceRange.Metadata.Add(columnMapping[(userSelectedRange[1, col] as Range).Column], cellValue); break; } } } } if (idFound & startFound & endFound) { rangeAddress = GetRangeAddress(userSelectedRange, sortedColumn, rangeWidth, row); sequenceRanges.Add(sequenceRange, rangeAddress); } } return sequenceRanges; }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return result; }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return result; }
/// <summary> /// For each group in the grouping, this method traverses through each range /// in the group and normalizes the ranges down to the minimal spanning set /// required to still show the same range spans. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// -> 10 to 100 /// -> 200 to 250 /// -> 35 to 45 /// -> 90 to 150 /// /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to: /// For minOverlap = 0 /// /// -> 10 to 150 /// -> 200 to 250 /// /// for minOverlap = -50 /// /// -> 10 to 250 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The overlapped sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping(); List<ISequenceRange> sortedRanges = new List<ISequenceRange>(); foreach (List<ISequenceRange> rangeList in this.groups.Values) { sortedRanges.AddRange(rangeList); sortedRanges.Sort(); while (sortedRanges.Count > 0) { ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID, sortedRanges[0].Start, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); seqRangeGroup.Add(seqRange); if (sortedRanges.Count > 0) { while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap) { seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); } } } sortedRanges.Clear(); } return seqRangeGroup; }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return(result); }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List<ISequenceRange> orphanRegions = new List<ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List<ISequenceRange> lengthAnomalyRegions = new List<ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }