/// <summary> /// Initializes a new instance of the GroupData class /// </summary> /// <param name="group">SequenceRangeGroup object</param> /// <param name="name">Name of SequenceRangeGroup</param> /// <param name="metadata">Metadata for SequenceRangeGroup</param> public GroupData( SequenceRangeGrouping group, string name, Dictionary<string, Dictionary<ISequenceRange, string>> metadata) { Group = group; Name = name; _metadata = metadata; }
/// <summary> /// Writes out a grouping of ISequenceRange objects to a specified /// text writer. /// </summary> public static void Format(this ISequenceRangeFormatter formatter, SequenceRangeGrouping rangeGroup, string filename) { if (string.IsNullOrEmpty(filename)) { throw new ArgumentNullException("filename"); } using (FileStream fs = File.Create(filename)) { formatter.Format(fs, rangeGroup); } }
/// <summary> /// Merges query sequence ranges with this sequence ranges. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 3 to 15 4 to 10 /// 5 to 18 11 to 20 /// /// Result for minOverlap set to 1 /// 3 to 20 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query sequence ranges.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The merged sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameQuery); } List <ISequenceRange> ranges = new List <ISequenceRange>(this.Flatten()); ranges.AddRange(query.Flatten()); SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges); return(seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired)); }
public void ComputeSequenceRangeGroupingMetrics(SequenceRangeGrouping srg) { groups = 0L; ranges = 0L; bases = 0L; foreach (string id in srg.GroupIDs) { ++groups; ranges += srg.GetGroup(id).Count; foreach (SequenceRange sr in srg.GetGroup(id)) { bases += sr.Length; } } return; }
/// <summary> /// For each group in the grouping, this method traverses through each range /// in the group and normalizes the ranges down to the minimal spanning set /// required to still show the same range spans. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// -> 10 to 100 /// -> 200 to 250 /// -> 35 to 45 /// -> 90 to 150 /// /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to: /// For minOverlap = 0 /// /// -> 10 to 150 /// -> 200 to 250 /// /// for minOverlap = -50 /// /// -> 10 to 250 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The overlapped sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping(); List <ISequenceRange> sortedRanges = new List <ISequenceRange>(); foreach (List <ISequenceRange> rangeList in this.groups.Values) { sortedRanges.AddRange(rangeList); sortedRanges.Sort(); while (sortedRanges.Count > 0) { ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID, sortedRanges[0].Start, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); seqRangeGroup.Add(seqRange); if (sortedRanges.Count > 0) { while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap) { seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); } } } sortedRanges.Clear(); } return(seqRangeGroup); }
CreateNodeXLVennDiagramWorkbookFromSequenceRangeGroupings ( Microsoft.Office.Interop.Excel.Application oApplication, SequenceRangeGrouping srgA, SequenceRangeGrouping srgB ) { // create the proper sets for VennDiagram SequenceRangeGrouping srgOnly_A, srgOnly_B, srgOnly_AB; CreateSequenceRangeGroupingsForVennDiagram(srgA, srgB, out srgOnly_A, out srgOnly_B, out srgOnly_AB); SequenceRangeGroupingMetrics srgmOnly_A = new SequenceRangeGroupingMetrics(srgOnly_A); SequenceRangeGroupingMetrics srgmOnly_B = new SequenceRangeGroupingMetrics(srgOnly_B); SequenceRangeGroupingMetrics srgmOnly_AB = new SequenceRangeGroupingMetrics(srgOnly_AB); VennDiagramData vdd = new VennDiagramData(srgmOnly_A.bases , srgmOnly_B.bases , srgmOnly_AB.bases); Workbook oWorkbook = CreateNodeXLVennDiagramWorkbook(oApplication, vdd); // write source data to workbook DisplaySourceData(srgA, Resources.A, oWorkbook); DisplaySourceData(srgB, Resources.B, oWorkbook); // Write overlap data to a sheet Worksheet outputSheet = oWorkbook.Sheets.Add(Type.Missing, oWorkbook.Sheets[oWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet); outputSheet.Name = Resources.OverlapsSheetName; WriteOverlapData(outputSheet, srgOnly_A, srgOnly_B, srgOnly_AB); oApplication.Visible = true; return oWorkbook; }
CreateNodeXLVennDiagramWorkbookFromSequenceRangeGroupings ( Microsoft.Office.Interop.Excel.Application oApplication, SequenceRangeGrouping srgA, SequenceRangeGrouping srgB, SequenceRangeGrouping srgC ) { SequenceRangeGrouping srgOnly_A, srgOnly_B, srgOnly_C, srgOnly_AB, srgOnly_AC, srgOnly_BC, srgOnly_ABC; CreateSequenceRangeGroupingsForVennDiagram(srgA , srgB , srgC , out srgOnly_A , out srgOnly_B , out srgOnly_C , out srgOnly_AB , out srgOnly_AC , out srgOnly_BC , out srgOnly_ABC); // generate the intersection Venn metrics SequenceRangeGroupingMetrics srgmOnly_A = new SequenceRangeGroupingMetrics(srgOnly_A); SequenceRangeGroupingMetrics srgmOnly_B = new SequenceRangeGroupingMetrics(srgOnly_B); SequenceRangeGroupingMetrics srgmOnly_C = new SequenceRangeGroupingMetrics(srgOnly_C); SequenceRangeGroupingMetrics srgmOnly_AB = new SequenceRangeGroupingMetrics(srgOnly_AB); SequenceRangeGroupingMetrics srgmOnly_AC = new SequenceRangeGroupingMetrics(srgOnly_AC); SequenceRangeGroupingMetrics srgmOnly_BC = new SequenceRangeGroupingMetrics(srgOnly_BC); SequenceRangeGroupingMetrics srgmOnly_ABC = new SequenceRangeGroupingMetrics(srgOnly_ABC); // create the NodeXL Venn diagram filefile VennDiagramData vdd = new VennDiagramData(srgmOnly_A.bases , srgmOnly_B.bases , srgmOnly_C.bases , srgmOnly_AB.bases , srgmOnly_AC.bases , srgmOnly_BC.bases , srgmOnly_ABC.bases); // To ensure NodeXL displays the diagram, DONOT make the application // visible or update the screen until the parameters are all set up. oApplication.ScreenUpdating = false; Workbook oWorkbook = CreateNodeXLVennDiagramWorkbook(oApplication, vdd); // write source data to workbook DisplaySourceData(srgA, Resources.A, oWorkbook); DisplaySourceData(srgB, Resources.B, oWorkbook); DisplaySourceData(srgC, Resources.C, oWorkbook); // Write overlap data to a sheet Worksheet outputSheet = oWorkbook.Sheets.Add(Type.Missing, oWorkbook.Sheets[oWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet); outputSheet.Name = Resources.OverlapsSheetName; WriteOverlapData(outputSheet, srgOnly_A, srgOnly_B, srgOnly_C, srgOnly_AB, srgOnly_AC, srgOnly_BC, srgOnly_ABC); oApplication.ScreenUpdating = true; oApplication.Visible = true; return oWorkbook; }
CreateSequenceRangeGroupingsForVennDiagram ( SequenceRangeGrouping srgA, SequenceRangeGrouping srgB, out SequenceRangeGrouping srgOnly_A, out SequenceRangeGrouping srgOnly_B, out SequenceRangeGrouping srgOnly_AB ) { // Create the proper intersected sets from the two original SequenceRangeGroups SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB); // use set terminology (Union) or boolean logic (Or) srgOnly_A = srgAB.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); // TODO: Subtract and Intersect should use same 'logic' (for bool 3rd arg) srgOnly_B = srgAB.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgOnly_AB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals); }
CreateSequenceRangeGroupingsForVennDiagram ( SequenceRangeGrouping srgA, SequenceRangeGrouping srgB, SequenceRangeGrouping srgC, out SequenceRangeGrouping srgOnly_A, out SequenceRangeGrouping srgOnly_B, out SequenceRangeGrouping srgOnly_C, out SequenceRangeGrouping srgOnly_AB, out SequenceRangeGrouping srgOnly_AC, out SequenceRangeGrouping srgOnly_BC, out SequenceRangeGrouping srgOnly_ABC ) { // create the proper sets for a 3 circle Venn Diagram SequenceRangeGrouping srgBC = srgB.MergeOverlaps(srgC); srgOnly_A = srgA.Subtract(srgBC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); SequenceRangeGrouping srgAC = srgA.MergeOverlaps(srgC); srgOnly_B = srgB.Subtract(srgAC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB); srgOnly_C = srgC.Subtract(srgAB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgAB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_AB = srgAB.Subtract(srgC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgAC = srgA.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_AC = srgAC.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgBC = srgB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); srgOnly_BC = srgBC.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); srgOnly_ABC = srgAB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals); }
/// <summary> /// For each group in the grouping, this method traverses through each range /// in the group and normalizes the ranges down to the minimal spanning set /// required to still show the same range spans. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// -> 10 to 100 /// -> 200 to 250 /// -> 35 to 45 /// -> 90 to 150 /// /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to: /// For minOverlap = 0 /// /// -> 10 to 150 /// -> 200 to 250 /// /// for minOverlap = -50 /// /// -> 10 to 250 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The overlapped sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping(); List<ISequenceRange> sortedRanges = new List<ISequenceRange>(); foreach (List<ISequenceRange> rangeList in this.groups.Values) { sortedRanges.AddRange(rangeList); sortedRanges.Sort(); while (sortedRanges.Count > 0) { ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID, sortedRanges[0].Start, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); seqRangeGroup.Add(seqRange); if (sortedRanges.Count > 0) { while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap) { seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End); if (isParentSeqRangesRequired) { AddParent(seqRange, sortedRanges[0]); } sortedRanges.RemoveAt(0); } } } sortedRanges.Clear(); } return seqRangeGroup; }
/// <summary> /// Method called when the user clicks Ok button on InputSelectionDialog. /// Takes care of parsing the selections and returning the result to the user. /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted. /// </summary> /// <param name="dialog">InputSequenceDialog object which raised this event</param> private void OnInputSequenceRangeDialogSubmit(ISelectionDialog dialog) { InputSelectionDialog selectionDialog = dialog as InputSelectionDialog; GroupData cachedData = null; // maps sheet to its column-mapping Dictionary<string, Dictionary<int, string>> columnMappedSheets = new Dictionary<string, Dictionary<int, string>>(); // Goes in the cache and is the output of this method as well. Dictionary<SequenceRangeGrouping, GroupData> groupsData = new Dictionary<SequenceRangeGrouping, GroupData>(); List<SequenceRangeGrouping> parsedSequences = new List<SequenceRangeGrouping>(); SequenceRangeGrouping sequenceRangeGroup = null; Dictionary<string, Dictionary<ISequenceRange, string>> sheetData = null; Dictionary<ISequenceRange, string> rangeData = null; List<ISequenceRange> sequenceRanges = null; List<Range> rangesInCurrentSequenceItem; // Regular expression to read the sheet name from address Regex regexSheetname = new Regex(@"(?<Sheetname>^.[^!]*)", RegexOptions.IgnoreCase); Match matchSheetname = null; string sheetName = string.Empty; try { foreach (InputSequenceItem currentSequenceItem in selectionDialog.GetSequences()) { try { rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress); // get from cache cachedData = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as GroupData; if (cachedData != null) { // got from cache cachedData.Name = currentSequenceItem.SequenceName; // Set ID if (currentSequenceItem.IsUseMetadataSelected) { parsedSequences.Insert(0, cachedData.Group); } else { parsedSequences.Add(cachedData.Group); } if (!groupsData.ContainsKey(cachedData.Group)) { groupsData.Add(cachedData.Group, cachedData); } } else { // parse it as its not in cache sheetData = new Dictionary<string, Dictionary<ISequenceRange, string>>(); sequenceRanges = new List<ISequenceRange>(); foreach (Range currentRange in rangesInCurrentSequenceItem) { bool firstRowIsHeader = false; // See if the sheet in which this range is, has a column mapping if (!columnMappedSheets.ContainsKey(GetMappingKey(currentRange))) { (currentRange.Worksheet as _Worksheet).Activate(); currentRange.Select(); Dictionary<int, string> mapping = GetMappingForRange(currentRange, out firstRowIsHeader); if (mapping == null) { // Could not get a proper mapping. So redirect to previous window. selectionDialog.ShowDialog(); return; } if (firstRowIsHeader) { UpdateColumnHeaders(currentRange, mapping); } columnMappedSheets.Add(GetMappingKey(currentRange), mapping); } // If range has a header, remove first row from it before sending it for parsing. Range rangeToParse; if (firstRowIsHeader) { if (currentRange.Rows.Count == 1) // only one row which is marked as header, throw error { throw new InvalidOperationException(Resources.SelectionModel_ParsingFailed); } rangeToParse = currentRange.get_Offset(1, 0); rangeToParse = rangeToParse.get_Resize(currentRange.Rows.Count - 1, currentRange.Columns.Count); } else { rangeToParse = currentRange; } Dictionary<ISequenceRange, string> srCollection = ExcelSelectionParser.RangeToSequenceRange( rangeToParse, columnMappedSheets[GetMappingKey(currentRange)]); foreach (KeyValuePair<ISequenceRange, string> sr in srCollection) { matchSheetname = regexSheetname.Match(sr.Value); if (matchSheetname.Success) { sheetName = matchSheetname.Groups["Sheetname"].Value; if (sheetData.TryGetValue(sheetName, out rangeData)) { rangeData.Add(sr.Key, sr.Value); } else { rangeData = new Dictionary<ISequenceRange, string>(); sheetData.Add(sheetName, rangeData); rangeData.Add(sr.Key, sr.Value); } sequenceRanges.Add(sr.Key); } } } sequenceRangeGroup = new SequenceRangeGrouping(sequenceRanges); cachedData = new GroupData(sequenceRangeGroup, currentSequenceItem.SequenceName, sheetData); SequenceCache.Add(rangesInCurrentSequenceItem, cachedData, selectionDialog.InputParamsAsKey); if (currentSequenceItem.IsUseMetadataSelected) { parsedSequences.Insert(0, cachedData.Group); } else { parsedSequences.Add(cachedData.Group); } groupsData.Add(cachedData.Group, cachedData); } } catch { // Set error status on the current item and re-throw the error currentSequenceItem.SetErrorStatus(true); throw; } } Dictionary<string, object> parameters = new Dictionary<string, object>(); parameters.Add(InputSelection.OVERLAP, selectionDialog.OverlappingBasePairs); parameters.Add(InputSelection.MINIMUMOVERLAP, selectionDialog.MinOverLap); // On successful completion of parsing... if (inputSequenceRangeSelectionComplete != null) { InputSequenceRangeSelectionEventArg eventArg = new InputSequenceRangeSelectionEventArg(groupsData, parsedSequences, parameters, argsForCallback); inputSequenceRangeSelectionComplete(eventArg); } selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit; selectionDialog.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error); selectionDialog.ShowDialog(); } }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return result; }
/// <summary> /// Method to validate parent seq ranges in result. /// </summary> /// <param name="resultSeqRange">Result seq range group.</param> /// <param name="refSeqRange">Reference seq range group.</param> /// <param name="querySeqRange">Query seq range group.</param> /// <param name="isParentSeqRangeRequired">Flag to indicate whether result should contain parent seq ranges or not.</param> /// <returns>Returns true if the parent seq ranges are valid; otherwise returns false.</returns> private static bool ValidateParentSeqRange(SequenceRangeGrouping resultSeqRange, SequenceRangeGrouping refSeqRange, SequenceRangeGrouping querySeqRange, bool isParentSeqRangeRequired) { IList<ISequenceRange> refSeqRangeList = new List<ISequenceRange>(); IList<ISequenceRange> querySeqRangeList = new List<ISequenceRange>(); foreach (string groupid in resultSeqRange.GroupIDs) { if (refSeqRange != null) { refSeqRangeList = refSeqRange.GetGroup(groupid); } if (querySeqRange != null) { querySeqRangeList = querySeqRange.GetGroup(groupid); } foreach (ISequenceRange resultRange in resultSeqRange.GetGroup(groupid)) { if (!isParentSeqRangeRequired) { if (resultRange.ParentSeqRanges.Count != 0) { return false; } } else { int refCount = refSeqRangeList.Where(R => resultRange.ParentSeqRanges.Contains(R)).Count(); int queryCount = querySeqRangeList.Where(R => resultRange.ParentSeqRanges.Contains(R)).Count(); if (refCount + queryCount != resultRange.ParentSeqRanges.Count) { return false; } } } } return true; }
/// <summary> /// Get chromoses with orphan regions /// </summary> /// <param name="filename">Path of the BAM file</param> /// <param name="mean">Mean value</param> /// <param name="deviation">Standard deviation</param> /// <returns></returns> private void DisplayOrphans(string filename) { SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; // Get Aligned sequences IList<SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences; pairedReads = alignmentMapobj.GetPairedReads(0, 0); // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); int count = orphans.Count(); if (count == 0) { Console.WriteLine("No Orphans to display"); } var orphanRegions = new List<ISequenceRange>(count); orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1))); // Get sequence range grouping object. SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions); if (!rangeGroup.GroupIDs.Any()) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("Region of Orphan reads:"); DisplaySequenceRange(rangeGroup); } SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps(); if (!mergedRegions.GroupIDs.Any()) { Console.Write("\r\nNo hot spots to display"); } else { Console.Write("\r\nChromosomal hot spot:"); DisplaySequenceRange(mergedRegions); } }
// // Read a Bed file into memory // public static SequenceRangeGrouping ReadBedFile(string filename) { var parser = new BedParser(); IList<ISequenceRange> listSequenceRange = parser.ParseRange(filename); if (verbose) { //listSequenceRange.ToString(); Console.Error.WriteLine("Processed File: {0}", filename); ListSequenceRangeToString(listSequenceRange); } var srg = new SequenceRangeGrouping(listSequenceRange); if (arguments.normalizeInputs) { srg.MergeOverlaps(); // could be called Normalize() or Cannonicalize() } return srg; }
/// <summary> /// Display Sequence range grops /// </summary> /// <param name="seqRangeGrops">Sequence Ranges grops</param> private static void DisplaySequenceRange(SequenceRangeGrouping seqRangeGrop) { IEnumerable<string> rangeGroupIds = seqRangeGrop.GroupIDs; string rangeID = string.Empty; // Display Sequence Ranges Console.Write("\r\nChromosome\t\tStart\tEnd"); foreach (string groupID in rangeGroupIds) { rangeID = groupID; // Get SequenceRangeIds. List<ISequenceRange> rangeList = seqRangeGrop.GetGroup(rangeID); foreach (ISequenceRange seqRange in rangeList) { Console.Write("\n{0}\t\t\t{1}\t{2}", seqRange.ID.ToString(), seqRange.Start.ToString(), seqRange.End.ToString()); } } }
/// <summary> /// Indentify hot spot chromosomes for length anamoly regions. /// </summary> /// <param name="inputFile"> Input file</param> /// <param name="mean">Mean value</param> /// <param name="standardDeviation">Standard deviation</param> private void IdentifyLentghAnamolies(string filename, float mean = -1, float deviation = -1) { bool calculateMeanNdeviation = false; if (mean == -1 || deviation == -1) { calculateMeanNdeviation = true; } SequenceAlignmentMap alignmentMapobj = null; if (!SAMInput) { BAMParser bamParser = new BAMParser(); alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename); } else { SAMParser samParser = new SAMParser(); alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename); } // get reads from sequence alignment map object. IList<PairedRead> pairedReads = null; if (calculateMeanNdeviation) { pairedReads = alignmentMapobj.GetPairedReads(); } else { pairedReads = alignmentMapobj.GetPairedReads(mean, deviation); } // Get the orphan regions. var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan); if (orphans.Count() == 0) { Console.WriteLine("No Orphans to display"); } List<ISequenceRange> orphanRegions = new List<ISequenceRange>(orphans.Count()); foreach (PairedRead orphanRead in orphans) { orphanRegions.Add(GetRegion(orphanRead.Read1)); } // Get sequence range grouping for Orphan regions. SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions); // Get the Length anomalies regions. var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly); if (lengthAnomalies.Count() == 0) { Console.WriteLine("No Anomalies to display"); } List<ISequenceRange> lengthAnomalyRegions = new List<ISequenceRange>(lengthAnomalies.Count()); foreach (PairedRead laRead in lengthAnomalies) { SequenceRange range = new SequenceRange(); range.ID = laRead.Read1.RName; range.Start = laRead.Read1.Pos; range.End = laRead.Read1.Pos + laRead.InsertLength; lengthAnomalyRegions.Add(range); } // Get sequence range grouping for length anomaly regions. SequenceRangeGrouping lengthAnomalyRangegroup = new SequenceRangeGrouping(lengthAnomalyRegions); if (lengthAnomalyRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Length anomalies reads to display"); } else { Console.Write("Region of length anomaly:"); DisplaySequenceRange(lengthAnomalyRangegroup); } if (orphanRangegroup.GroupIDs.Count() == 0) { Console.Write("\r\nNo Orphan reads to display"); } else { Console.Write("\r\nRegion of Orphan reads:"); DisplaySequenceRange(orphanRangegroup); } SequenceRangeGrouping intersectedRegions = lengthAnomalyRangegroup.Intersect(orphanRangegroup); if (intersectedRegions.GroupIDs.Count() == 0) { Console.Write("\r\nNo Hot spots found"); } else { Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:"); DisplaySequenceRange(intersectedRegions); } }
public SequenceRangeGroupingMetrics(SequenceRangeGrouping srg) { ComputeSequenceRangeGroupingMetrics(srg); }
/// <summary> /// This method will dump the region overlap data to a sheet in the NodeXL workbook. /// </summary> /// <param name="outputSheet">Sheet to which the data should be dumped to</param> /// <param name="srgOnly_A">Only A</param> /// <param name="srgOnly_B">Only A</param> /// <param name="srgOnly_AB">Only A</param> /// <returns>Worksheet after writing data</returns> private static Worksheet WriteOverlapData(Worksheet outputSheet, SequenceRangeGrouping srgOnly_A, SequenceRangeGrouping srgOnly_B, SequenceRangeGrouping srgOnly_AB) { outputSheet.Application.ScreenUpdating = false; outputSheet.Application.EnableEvents = false; List<ISequenceRange> seqs_AList = null; List<ISequenceRange> seqs_BList = null; List<ISequenceRange> seqs_ABList = null; int dataStartRow = 7; int currentRow; try { // write the header outputSheet.Range["A2"].Value2 = Resources.RegionsCountLabel; outputSheet.Range["A3"].Value2 = Resources.BasePairsCountLabel; outputSheet.Range["A4"].Value2 = Resources.PerOfBasePairsInRegionLabel; outputSheet.Range["D1"].Value2 = Resources.OnlyA; outputSheet.Range["E1"].Value2 = Resources.OnlyB; outputSheet.Range["F1"].Value2 = Resources.OnlyAB; outputSheet.Range["G1"].Value2 = Resources.Total; outputSheet.Range["A6", "F6"].Value2 = BedHeadersForTwoInput; outputSheet.Range["A6", "F6"].Cells.Borders[XlBordersIndex.xlEdgeBottom].Weight = XlBorderWeight.xlThin; outputSheet.Range["A1", "F1"].Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter; outputSheet.Range["A6", "F6"].Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter; currentRow = dataStartRow; // Get all groupID's in all ranges List<string> groupIDs = new List<string>(); groupIDs.AddRange(srgOnly_A.GroupIDs); groupIDs.AddRange(srgOnly_B.GroupIDs); groupIDs.AddRange(srgOnly_AB.GroupIDs); int prevRow = dataStartRow; // Display all overlap data foreach (string groupID in groupIDs.Distinct()) { seqs_AList = srgOnly_A.GetGroup(groupID); seqs_BList = srgOnly_B.GetGroup(groupID); seqs_ABList = srgOnly_AB.GetGroup(groupID); if (seqs_AList == null) seqs_AList = new List<ISequenceRange>(); if (seqs_BList == null) seqs_BList = new List<ISequenceRange>(); if (seqs_ABList == null) seqs_ABList = new List<ISequenceRange>(); int indexA = 0; int indexB = 0; int indexAB = 0; object[,] output = new object[seqs_AList.Count + seqs_BList.Count + seqs_ABList.Count, 6]; while (indexA < seqs_AList.Count || indexB < seqs_BList.Count || indexAB < seqs_ABList.Count) { ISequenceRange a = indexA < seqs_AList.Count ? seqs_AList[indexA] : null; ISequenceRange b = indexB < seqs_BList.Count ? seqs_BList[indexB] : null; ISequenceRange ab = indexAB < seqs_ABList.Count ? seqs_ABList[indexAB] : null; ISequenceRange range = GetSmallestSeqRangeToDisplay(a, b, ab); if (range == null) { indexA++; indexB++; indexAB++; continue; } output[currentRow - prevRow, 0] = range.ID; output[currentRow - prevRow, 1] = range.Start; output[currentRow - prevRow, 2] = range.End; if (range == a) { // display output[currentRow - prevRow, 3] = Math.Abs(range.End - range.Start); indexA++; } else if (range == b) { // display output[currentRow - prevRow, 4] = Math.Abs(range.End - range.Start); indexB++; } else { // display output[currentRow - prevRow, 5] = Math.Abs(range.End - range.Start); indexAB++; } currentRow++; } outputSheet.Range["A" + prevRow.ToString(), "F" + (currentRow -1).ToString()].Value2 = output; prevRow = currentRow; } string formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "D", dataStartRow, "D", currentRow - 1); outputSheet.Range["D2"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "E", dataStartRow, "E", currentRow - 1); outputSheet.Range["E2"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "F", dataStartRow, "F", currentRow - 1); outputSheet.Range["F2"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 2, "F", 2); outputSheet.Range["G2"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", dataStartRow, "D", currentRow - 1); outputSheet.Range["D3"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "E", dataStartRow, "E", currentRow - 1); outputSheet.Range["E3"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "F", dataStartRow, "F", currentRow - 1); outputSheet.Range["F3"].Formula = formula; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 3, "F", 3); outputSheet.Range["G3"].Formula = formula; outputSheet.Range["D4"].Formula = "=D3/G3"; outputSheet.Range["E4"].Formula = "=E3/G3"; outputSheet.Range["F4"].Formula = "=F3/G3"; formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 4, "F", 4); outputSheet.Range["G4"].Formula = formula; outputSheet.Range["D4", "G4"].Cells.NumberFormat = "0.00%"; outputSheet.UsedRange.Columns.AutoFit(); } finally { outputSheet.Application.ScreenUpdating = true; outputSheet.Application.EnableEvents = true; } return outputSheet; }
// default printing of SequenceRangeGrouping // public static void SequenceRangeGroupingToString(SequenceRangeGrouping srg, string name) { Console.Error.Write("[{0}] : SeqeuenceRangeGrouping: ", name); var srgm = new SequenceRangeGroupingMetrics(srg); Console.Error.WriteLine("{0}, {1}, {2}", srgm.groups, srgm.ranges, srgm.bases); foreach (string id in srg.GroupIDs) { Console.Error.WriteLine("--GroupID: {0}, {1}", id, srg.GetGroup(id).Count()); ListSequenceRangeToString(srg.GetGroup(id)); } Console.Error.WriteLine(); }
/// <summary> /// Write source sequence ranges to the given workbook in a new sheet /// </summary> /// <param name="sourceSequenceRanges">Source sequence ranges</param> /// <param name="sheetName">Name to be given for the new sheet</param> /// <param name="targetWorkbook">Workbook in which the sheet has to be added</param> /// <returns>Newly created sheet with source data.</returns> private static Worksheet DisplaySourceData(SequenceRangeGrouping sourceSequenceRanges, string sheetName, Workbook targetWorkbook) { //create new sheet Worksheet outputSheet = targetWorkbook.Sheets.Add(Type.Missing, targetWorkbook.Sheets[targetWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet); outputSheet.Name = sheetName; targetWorkbook.Application.ScreenUpdating = false; targetWorkbook.Application.EnableEvents = false; try { // write the header outputSheet.Range["A1", "C1"].Value2 = BedHeaders; // write bed data int currentRow = 2; foreach (string groupID in sourceSequenceRanges.GroupIDs) { Range outputRange = WriteSequenceRangeAt(outputSheet.Cells[currentRow, 1], sourceSequenceRanges.GetGroup(groupID), false); currentRow += outputRange.Rows.Count; } outputSheet.UsedRange.Columns.AutoFit(); } finally { targetWorkbook.Application.ScreenUpdating = true; targetWorkbook.Application.EnableEvents = true; } return outputSheet; }
/// <summary> /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping. /// /// For instance if you had in group 'Chr1' the following ranges: /// /// Ranges in this instance Ranges in the query /// 0 to 10 20 to 40 /// 30 to 50 70 to 100 /// 60 to 80 400 to 800 /// 300 to 500 850 to 900 /// 600 to 700 900 to 1200 /// 800 to 1000 /// /// Result for minOverlap set to 1 /// 1. If outputType is OverlappingPiecesOfIntervals. /// 30 to 40 /// 70 to 80 /// 400 to 500 /// 600 o 700 /// 850 to 900 /// 900 to 1000 /// 2. If outputType is OverlappingIntervals /// 30 to 50 /// 60 to 80 /// 300 to 500 /// 600 to 700 /// 800 to 1000 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped. /// By default this will be set to 1.</param> /// <param name="outputType"> /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with /// query ranges will be returned.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence ranges.</param> /// <returns>The intersected result.</returns> public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { // If the minOverlap is lessthan or equal to zero and overlapping intervals are required. // then add the ref seq to result. if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals) { // Add ref sequence only once for query ranges having same start and end. if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)) { range = new SequenceRange( refRange.ID, Math.Max(queryRange.Start, refRange.Start), Math.Min(queryRange.End, refRange.End)); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } else { // Add ref sequence only once. if (previousOverlappingRange == null) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } else { break; } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } } return(result); }
/// <summary> /// Formats and writes the query region (Output of Merge/Subtract/Intersect) operations /// </summary> /// <param name="resultWorkbook"> /// Workbook to which Range has to be written /// </param> /// <param name="resultSheetname">New worksheet name</param> /// <param name="resultGroup">Output group</param> /// <param name="groupsData"> /// Complete input groups information /// Contains individual Group, sheet and addresses of ISequenceRange /// </param> private void WriteSequenceRange( Workbook resultWorkbook, string resultSheetname, SequenceRangeGrouping resultGroup, Dictionary<SequenceRangeGrouping, GroupData> groupsData, bool showMetadata, bool showBasePairCount) { if (resultGroup.GroupIDs.Count() > 0) { int baseRowIndex = 2, baseColumnIndex = 2; int dataRowIndex = 0, dataColumnIndex = 0; int totalColumnCount = 0; object[,] values; List<string> hyperlinks = null; List<ISequenceRange> resultSequenceRanges = null; Dictionary<SequenceRangeGrouping, Dictionary<string, int>> groupSheetIndices = null; Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> columnData = null; Dictionary<ISequenceRange, string> rangedata = null; Dictionary<SequenceRangeGrouping, SequenceRangeGrouping> allSheetData = null; Dictionary<SequenceRangeGrouping, int> allSheetCount = null; SequenceRangeGrouping groupToMerge = null; SequenceRangeGrouping referenceGroup = null; SequenceRangeGrouping queryGroup = null; SequenceRangeGrouping sheetGroup = null; int sheetCount = 0; Range activeRange = null; var resultWorksheet = resultWorkbook.Worksheets.Add( Type.Missing, resultWorkbook.Worksheets.get_Item(resultWorkbook.Worksheets.Count), Type.Missing, Type.Missing) as Worksheet; ((_Worksheet)resultWorksheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; resultWorksheet.Name = resultSheetname; activeRange = resultWorksheet.get_Range(GetColumnString(baseColumnIndex) + baseRowIndex, Type.Missing); rangedata = groupsData.Values.Select(gd => gd.Metadata) // Get the Metadata .SelectMany(sd => sd.Values).ToList() // Get the Dictionary .SelectMany(rd => rd).ToList().ToDictionary(k => k.Key, v => v.Value); // Convert to dictionary groupSheetIndices = new Dictionary<SequenceRangeGrouping, Dictionary<string, int>>(); baseRowIndex = this.WriteSequenceRangeHeader( resultWorksheet, groupSheetIndices, groupsData, baseRowIndex, baseColumnIndex, ref totalColumnCount, showMetadata, showBasePairCount); totalColumnCount -= (baseColumnIndex - 1); foreach (string resultGroupKey in resultGroup.GroupIDs) { resultSequenceRanges = resultGroup.GetGroup(resultGroupKey); dataRowIndex = 0; values = new object[resultSequenceRanges.Count, totalColumnCount]; activeRange = resultWorksheet.get_Range( GetColumnString(baseColumnIndex) + baseRowIndex, Missing.Value); activeRange = activeRange.get_Resize(resultSequenceRanges.Count, totalColumnCount); foreach (ISequenceRange resultSequenceRange in resultSequenceRanges) { referenceGroup = null; queryGroup = null; dataColumnIndex = 0; allSheetData = new Dictionary<SequenceRangeGrouping, SequenceRangeGrouping>(); allSheetCount = new Dictionary<SequenceRangeGrouping, int>(); values[dataRowIndex, dataColumnIndex] = resultSequenceRange.ID; dataColumnIndex++; values[dataRowIndex, dataColumnIndex] = resultSequenceRange.Start; dataColumnIndex++; values[dataRowIndex, dataColumnIndex] = resultSequenceRange.End; dataColumnIndex++; if (showMetadata) { for (int index = 3; index < rangeHeaders.Count; index++) { values[dataRowIndex, dataColumnIndex] = ExtractRangeMetadata( resultSequenceRange, rangeHeaders[index]); dataColumnIndex++; } } columnData = PrepareSequenceRowRange( groupsData, groupSheetIndices, rangedata, resultSequenceRange); foreach (var columnGroup in columnData) { if (showBasePairCount) { // Get the parent ranges for Group's range in a column groupToMerge = new SequenceRangeGrouping(columnGroup.Value.Item3); if (1 < columnGroup.Value.Item3.Count) { groupToMerge = groupToMerge.MergeOverlaps(0, false); } // Render data for Group's range in a column values[dataRowIndex, columnGroup.Key] = groupToMerge.GroupRanges.Sum(sr => sr.End - sr.Start); values[dataRowIndex, columnGroup.Key + 1] = columnGroup.Value.Item3.Count; } else { values[dataRowIndex, columnGroup.Key] = columnGroup.Value.Item3.Count; } // Let the hyperlink added hyperlinks = new List<string>(); foreach (ISequenceRange range in columnGroup.Value.Item3) { hyperlinks.AddRange(rangedata[range].Split(',')); } this.ShowHyperlink( hyperlinks, activeRange, columnGroup.Key, dataRowIndex, showBasePairCount); if (showBasePairCount) { // Calculate data for all group if (allSheetData.TryGetValue(columnGroup.Value.Item1, out sheetGroup)) { allSheetData[columnGroup.Value.Item1] = sheetGroup.MergeOverlaps( groupToMerge, 0, false); } else { allSheetData[columnGroup.Value.Item1] = groupToMerge; } // Build up reference & query groups (later get common range using this) if (columnGroup.Value.Item2) { if (null == referenceGroup) { referenceGroup = groupToMerge; } else { referenceGroup = referenceGroup.MergeOverlaps(groupToMerge, 0, false); } } else { if (null == queryGroup) { queryGroup = groupToMerge; } else { queryGroup = queryGroup.MergeOverlaps(groupToMerge, 0, false); } } } // Calculate range count for all group if (allSheetCount.TryGetValue(columnGroup.Value.Item1, out sheetCount)) { allSheetCount[columnGroup.Value.Item1] += columnGroup.Value.Item3.Count; } else { allSheetCount[columnGroup.Value.Item1] = columnGroup.Value.Item3.Count; } } // Render all columns in SequenceRangeGrouping foreach (var allData in allSheetCount) { dataColumnIndex = groupSheetIndices[allData.Key].Values.Min() - (showBasePairCount ? 2 : 1); if (showBasePairCount) { values[dataRowIndex, dataColumnIndex] = allSheetData[allData.Key].GroupRanges.Sum(sr => sr.End - sr.Start); dataColumnIndex++; } values[dataRowIndex, dataColumnIndex] = allData.Value; } if (showBasePairCount) { // Render common column in SequenceRangeGrouping if (null != referenceGroup && null != queryGroup) { referenceGroup = referenceGroup.Intersect( queryGroup, 0, IntersectOutputType.OverlappingPiecesOfIntervals, false); values[dataRowIndex, totalColumnCount - 1] = referenceGroup.GroupRanges.Sum(sr => sr.End - sr.Start); } else { values[dataRowIndex, totalColumnCount - 1] = 0; } } dataRowIndex++; } activeRange.set_Value(Missing.Value, values); baseRowIndex += dataRowIndex; } resultWorksheet.Columns.AutoFit(); this.NormalizeColumWidths(resultWorksheet.UsedRange); this.EnableAllControls(); } else { MessageBox.Show( Resources.NO_RESULT, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Information); } }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Merges query sequence ranges with this sequence ranges. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 3 to 15 4 to 10 /// 5 to 18 11 to 20 /// /// Result for minOverlap set to 1 /// 3 to 20 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query sequence ranges.</param> /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The merged sequence range grouping.</returns> public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameQuery); } List<ISequenceRange> ranges = new List<ISequenceRange>(this.Flatten()); ranges.AddRange(query.Flatten()); SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges); return seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired); }
/// <summary> /// Merges two sequence ranges with default parameters. /// </summary> /// <param name="referenceSequence">Reference sequence for merging.</param> /// <param name="querySequence">Query sequence for merging.</param> /// <returns>SequenceRangeGrouping with merged output.</returns> public static SequenceRangeGrouping DoBEDMerge(SequenceRangeGrouping referenceSequence, SequenceRangeGrouping querySequence) { return referenceSequence.MergeOverlaps(querySequence); }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return result; }
// // print public static long SequenceRangeGroupingCBases(SequenceRangeGrouping srg) { var srgm = new SequenceRangeGroupingMetrics(srg); return (srgm.bases); }