/// <summary> /// Return overlapping ranges from the query sequence ranges for the specified minimal overlap. /// Note that this method does not verifies the id. /// This method is used by intersect and subtract methods. /// </summary> /// <param name="refSeqRange">Reference seq range</param> /// <param name="querySeqRanges">Query sequence ranges</param> /// <param name="minimalOverlap">Minimum overlap required.</param> /// <returns>Overlapping Ranges from query ranges.</returns> private List <ISequenceRange> GetOverlappingRenges(ISequenceRange refSeqRange, List <ISequenceRange> querySeqRanges, long minimalOverlap) { long totalOverlap = 0; if (minimalOverlap <= 0) { minimalOverlap = 1; } List <ISequenceRange> result = new List <ISequenceRange>(); foreach (ISequenceRange queryRange in querySeqRanges) { if (queryRange.Start <= refSeqRange.End && queryRange.End >= refSeqRange.Start) { totalOverlap = totalOverlap + Math.Min(queryRange.End, refSeqRange.End) - Math.Max(queryRange.Start, refSeqRange.Start); result.Add(queryRange); } } if (result.Count > 0 && totalOverlap >= minimalOverlap) { return(result); } else { return(null); } }
/// <summary> /// Adds specified parentrange to the specified range's metadata. /// </summary> /// <param name="range">ISequenceRange instance to which the parentrange to be added.</param> /// <param name="parentRange">Parent range of the specified range.</param> private void AddParent(ISequenceRange range, ISequenceRange parentRange) { if (!range.ParentSeqRanges.Contains(parentRange)) { range.ParentSeqRanges.Add(parentRange); } }
/// <summary> /// Adds specified parentranges to the specified range's metadata. /// </summary> /// <param name="range">ISequenceRange instance to which the parentrange to be added.</param> /// <param name="parentRanges">Parent ranges of the specified range.</param> private void AddParent(ISequenceRange range, IList <ISequenceRange> parentRanges) { foreach (ISequenceRange parentRange in parentRanges) { AddParent(range, parentRange); } }
/// <summary> /// Adds specified parentranges to the specified range's metadata. /// </summary> /// <param name="range">ISequenceRange instance to which the parentrange to be added.</param> /// <param name="parentRanges">Parent ranges of the specified range.</param> private static void AddParent(ISequenceRange range, IEnumerable <ISequenceRange> parentRanges) { foreach (ISequenceRange parentRange in parentRanges) { AddParent(range, parentRange); } }
/// <summary> /// Compares two sequence ranges. /// </summary> /// <param name="other">SequenceRange instance to compare.</param> /// <returns> /// If the Start values of the two ranges are identical then the /// result of this comparison is the result from calling CompareTo() on /// the two End values. If the Start values are not equal then the result /// of this comparison is the result of calling CompareTo() on the two /// Start values. /// </returns> public int CompareTo(ISequenceRange other) { int compare = Start.CompareTo(other.Start); if (compare == 0) { compare = End.CompareTo(other.End); } if (compare == 0) { compare = string.Compare(ID, other.ID, StringComparison.OrdinalIgnoreCase); } if (compare == 0) { compare = ParentSeqRanges.Count.CompareTo(other.ParentSeqRanges.Count); if (compare == 0) { for (int index = 0; index < ParentSeqRanges.Count; index++) { compare = ParentSeqRanges[index].CompareTo(other.ParentSeqRanges[index]); if (compare != 0) { break; } } } } return(compare); }
/// <summary> /// Copies the metadata from specified fromRange to toRange. /// </summary> /// <param name="toRange">Range to which the metadata has to be copied.</param> /// <param name="fromRange">Range from which the metadata has to be copied.</param> private static void CopyOfMetadata(ISequenceRange toRange, ISequenceRange fromRange) { toRange.Metadata.Clear(); if (fromRange.Metadata.Count > 0) { foreach (string key in fromRange.Metadata.Keys) { object metadataItem = fromRange.Metadata[key]; toRange.Metadata[key] = metadataItem; } } }
/// <summary> /// Adds a SequenceRange to the grouping, creating a new bucket /// if the ID of the range has not yet been added to the grouping /// or adding to an existing bucket if it has. /// </summary> /// <param name="range">The range item to add to the grouping</param> public void Add(ISequenceRange range) { if (range == null) return; if (range.ID == null) throw new ArgumentException("Can not group a SequenceRange that has no ID"); if (groups.ContainsKey(range.ID)) { groups[range.ID].Add(range); } else { List<ISequenceRange> list = new List<ISequenceRange>(); list.Add(range); groups[range.ID] = list; } }
/// <summary> /// Copies the metadata from specified fromRange to toRange. /// </summary> /// <param name="toRange">Range to which the metadata has to be copied.</param> /// <param name="fromRange">Range from which the metadata has to be copied.</param> private void CopyOfMetadata(ISequenceRange toRange, ISequenceRange fromRange) { toRange.Metadata.Clear(); if (fromRange.Metadata.Count > 0) { foreach (string key in fromRange.Metadata.Keys) { object metadataItem = fromRange.Metadata[key]; ICloneable clonableObj = metadataItem as ICloneable; if (clonableObj != null) { toRange.Metadata[key] = clonableObj.Clone(); } else { toRange.Metadata[key] = metadataItem; } } } }
/// <summary> /// Adds a SequenceRange to the grouping, creating a new bucket /// if the ID of the range has not yet been added to the grouping /// or adding to an existing bucket if it has. /// </summary> /// <param name="range">The range item to add to the grouping</param> public void Add(ISequenceRange range) { if (range == null) { return; } if (range.ID == null) { throw new ArgumentException("Can not group a SequenceRange that has no ID"); } if (_groups.ContainsKey(range.ID)) { _groups[range.ID].Add(range); } else { List <ISequenceRange> list = new List <ISequenceRange>(); list.Add(range); _groups[range.ID] = list; } }
/// <summary> /// Adds specified parentranges to the specified range's metadata. /// </summary> /// <param name="range">ISequenceRange instance to which the parentrange to be added.</param> /// <param name="parentRanges">Parent ranges of the specified range.</param> private static void AddParent(ISequenceRange range, IEnumerable<ISequenceRange> parentRanges) { foreach (ISequenceRange parentRange in parentRanges) { AddParent(range, parentRange); } }
/// <summary> /// Compares two sequence ranges. /// </summary> /// <param name="other">SequenceRange instance to compare.</param> /// <returns> /// If the Start values of the two ranges are identical then the /// result of this comparison is the result from calling CompareTo() on /// the two End values. If the Start values are not equal then the result /// of this comparison is the result of calling CompareTo() on the two /// Start values. /// </returns> public int CompareTo(ISequenceRange other) { if (other == null) { return -1; } int compare = Start.CompareTo(other.Start); if (compare == 0) compare = End.CompareTo(other.End); if (compare == 0) compare = string.Compare(ID, other.ID, StringComparison.OrdinalIgnoreCase); if (compare == 0) { compare = ParentSeqRanges.Count.CompareTo(other.ParentSeqRanges.Count); if (compare == 0) { for (int index = 0; index < ParentSeqRanges.Count; index++) { compare = ParentSeqRanges[index].CompareTo(other.ParentSeqRanges[index]); if (compare != 0) break; } } } return compare; }
/// <summary> /// Prepares a row of SequenceRange for writing. /// </summary> /// <param name="groupsData"> /// Complete input groups information /// Contains individual Group, sheet and addresses of ISequenceRange /// </param> /// <param name="groupSheetIndices"> /// Complete indices. /// Contains individual column of each sheet of each Group /// </param> /// <param name="rangedata">Sequence and address list</param> /// <param name="resultSequenceRange">Query region that has to be prepared</param> /// <returns>Prepared data ready for output</returns> private static Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> PrepareSequenceRowRange ( Dictionary<SequenceRangeGrouping, GroupData> groupsData, Dictionary<SequenceRangeGrouping, Dictionary<string, int>> groupSheetIndices, Dictionary<ISequenceRange, string> rangedata, ISequenceRange resultSequenceRange) { Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> columnData = null; columnData = new Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>>(); foreach (ISequenceRange parentRange in resultSequenceRange.ParentSeqRanges) { PrepareSequenceRangeRow(groupsData, groupSheetIndices, rangedata, columnData, parentRange); } return columnData; }
/// <summary> /// Prepares a row of SequenceRange for writing. /// </summary> /// <param name="groupsData"> /// Complete input groups information /// Contains individual Group, sheet and addresses of ISequenceRange /// </param> /// <param name="groupSheetIndices"> /// Complete indices. /// Contains individual column of each sheet of each Group /// </param> /// <param name="rangedata">Sequence and address list</param> /// <param name="columnData">Data ready for output</param> /// <param name="parentRange">Query region that has to be prepared</param> private static void PrepareSequenceRangeRow( Dictionary<SequenceRangeGrouping, GroupData> groupsData, Dictionary<SequenceRangeGrouping, Dictionary<string, int>> groupSheetIndices, Dictionary<ISequenceRange, string> rangedata, Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> columnData, ISequenceRange parentRange) { IEnumerable<SequenceRangeGrouping> grp = groupsData.Keys.Where(s => s.GroupRanges.Contains(parentRange)); if (0 == grp.Count()) { foreach (ISequenceRange grandParent in parentRange.ParentSeqRanges) { PrepareSequenceRangeRow(groupsData, groupSheetIndices, rangedata, columnData, grandParent); } return; } Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>> parentType = null; // Where the parent is ref / query List<ISequenceRange> parentRanges = null; SequenceRangeGrouping group = null; List<SequenceRangeGrouping> inputGroups = groupsData.Keys.ToList(); // Regular expression to read the sheet name from address var regexSheetname = new Regex(@"(?<Sheetname>^.[^!]*)", RegexOptions.IgnoreCase); Match matchSheetname = null; Dictionary<string, int> sheetIndices = null; string sheetName = string.Empty; int sheetIndex; group = grp.First(); if (groupSheetIndices.TryGetValue(group, out sheetIndices)) { matchSheetname = regexSheetname.Match(rangedata[parentRange]); if (matchSheetname.Success) { sheetName = matchSheetname.Groups["Sheetname"].Value; } if (sheetIndices.TryGetValue(sheetName, out sheetIndex)) { if (columnData.TryGetValue(sheetIndex, out parentType)) { parentRanges = parentType.Item3; } else { parentRanges = new List<ISequenceRange>(); parentType = new Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>( group, inputGroups.IndexOf(group) == 0, parentRanges); columnData.Add(sheetIndex, parentType); } parentRanges.Add(parentRange); } } }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in _groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(_groups[id]); if (query._groups.ContainsKey(id)) { querySeqRanges.AddRange(query._groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in _groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(_groups[id]); if (query._groups.ContainsKey(id)) { querySeqRanges.AddRange(query._groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return(result); }
/// <summary> /// Extracts the metadata in a SequenceRange object. /// </summary> /// <param name="range">Range object whose metadata has to be extracted.</param> /// <param name="key">Key for the metadata.</param> /// <returns>Value of the metadata.</returns> private static object ExtractRangeMetadata(ISequenceRange range, string key) { object metadataValue = null; if (range != null && range.Metadata != null && !string.IsNullOrEmpty(key) && range.Metadata.ContainsKey(key)) { metadataValue = range.Metadata[key]; } return metadataValue; }
/// <summary> /// Return overlapping ranges from the query sequence ranges for the specified minimal overlap. /// Note that this method does not verifies the id. /// This method is used by intersect and subtract methods. /// </summary> /// <param name="refSeqRange">Reference seq range</param> /// <param name="querySeqRanges">Query sequence ranges</param> /// <param name="minimalOverlap">Minimum overlap required.</param> /// <returns>Overlapping Ranges from query ranges.</returns> private static List<ISequenceRange> GetOverlappingRenges(ISequenceRange refSeqRange, List<ISequenceRange> querySeqRanges, long minimalOverlap) { long totalOverlap = 0; if (minimalOverlap <= 0) { minimalOverlap = 1; } List<ISequenceRange> result = new List<ISequenceRange>(); foreach (ISequenceRange queryRange in querySeqRanges) { if (queryRange.Start <= refSeqRange.End && queryRange.End >= refSeqRange.Start) { totalOverlap = totalOverlap + Math.Min(queryRange.End, refSeqRange.End) - Math.Max(queryRange.Start, refSeqRange.Start); result.Add(queryRange); } } if (result.Count > 0 && totalOverlap >= minimalOverlap) { return result; } else { return null; } }
/// <summary> /// Adds specified parentrange to the specified range's metadata. /// </summary> /// <param name="range">ISequenceRange instance to which the parentrange to be added.</param> /// <param name="parentRange">Parent range of the specified range.</param> private static void AddParent(ISequenceRange range, ISequenceRange parentRange) { if (!range.ParentSeqRanges.Contains(parentRange)) { range.ParentSeqRanges.Add(parentRange); } }
/// <summary> /// Copies the metadata from specified fromRange to toRange. /// </summary> /// <param name="toRange">Range to which the metadata has to be copied.</param> /// <param name="fromRange">Range from which the metadata has to be copied.</param> private static void CopyOfMetadata(ISequenceRange toRange, ISequenceRange fromRange) { toRange.Metadata.Clear(); if (fromRange.Metadata.Count > 0) { foreach (string key in fromRange.Metadata.Keys) { object metadataItem = fromRange.Metadata[key]; toRange.Metadata[key] = metadataItem; } } }
public Cluster(string refGenome, int refStart) { GenomeLocation = new SequenceRange(); GenomeLocation.ID = refGenome; GenomeLocation.Start = refStart; }