コード例 #1
0
ファイル: GroupData.cs プロジェクト: cpatmoore/bio
 /// <summary>
 /// Initializes a new instance of the GroupData class
 /// </summary>
 /// <param name="group">SequenceRangeGroup object</param>
 /// <param name="name">Name of SequenceRangeGroup</param>
 /// <param name="metadata">Metadata for SequenceRangeGroup</param>
 public GroupData(
         SequenceRangeGrouping group,
         string name,
         Dictionary<string, Dictionary<ISequenceRange, string>> metadata)
 {
     Group = group;
     Name = name;
     _metadata = metadata;
 }
コード例 #2
0
        /// <summary>
        /// Writes out a grouping of ISequenceRange objects to a specified
        /// text writer.
        /// </summary>
        public static void Format(this ISequenceRangeFormatter formatter, SequenceRangeGrouping rangeGroup, string filename)
        {
            if (string.IsNullOrEmpty(filename))
            {
                throw new ArgumentNullException("filename");
            }

            using (FileStream fs = File.Create(filename))
            {
                formatter.Format(fs, rangeGroup);
            }
        }
コード例 #3
0
        /// <summary>
        /// Writes out a grouping of ISequenceRange objects to a specified
        /// text writer.
        /// </summary>
        public static void Format(this ISequenceRangeFormatter formatter, SequenceRangeGrouping rangeGroup, string filename)
        {
            if (string.IsNullOrEmpty(filename))
            {
                throw new ArgumentNullException("filename");    
            }

            using (FileStream fs = File.Create(filename))
            {
                formatter.Format(fs, rangeGroup);
            }
        }
コード例 #4
0
        /// <summary>
        /// Merges query sequence ranges with this sequence ranges.
        ///
        /// For example,
        ///
        ///  Ranges in this instance   Ranges in the query
        ///    3 to  15                   4 to 10
        ///    5 to  18                  11 to 20
        ///
        ///  Result for minOverlap set to 1
        ///   3 to 20
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query sequence ranges.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        /// <returns>The merged sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameQuery);
            }

            List <ISequenceRange> ranges = new List <ISequenceRange>(this.Flatten());

            ranges.AddRange(query.Flatten());
            SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges);

            return(seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired));
        }
コード例 #5
0
        public void ComputeSequenceRangeGroupingMetrics(SequenceRangeGrouping srg)
        {
            groups = 0L;
            ranges = 0L;
            bases = 0L;

            foreach (string id in srg.GroupIDs)
            {
                ++groups;
                ranges += srg.GetGroup(id).Count;
                foreach (SequenceRange sr in srg.GetGroup(id))
                {
                    bases += sr.Length;
                }
            }
            return;
        }
コード例 #6
0
        /// <summary>
        /// For each group in the grouping, this method traverses through each range
        /// in the group and normalizes the ranges down to the minimal spanning set
        /// required to still show the same range spans.
        ///
        /// For instance if you had in group 'Chr1' the following ranges:
        ///
        /// -> 10 to 100
        /// -> 200 to 250
        /// -> 35 to 45
        /// -> 90 to 150
        ///
        /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to:
        /// For minOverlap = 0
        ///
        /// -> 10 to 150
        /// -> 200 to 250
        ///
        /// for minOverlap = -50
        ///
        /// -> 10 to 250
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        /// <returns>The overlapped sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping();
            List <ISequenceRange> sortedRanges  = new List <ISequenceRange>();

            foreach (List <ISequenceRange> rangeList in this.groups.Values)
            {
                sortedRanges.AddRange(rangeList);
                sortedRanges.Sort();

                while (sortedRanges.Count > 0)
                {
                    ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID,
                                                                sortedRanges[0].Start,
                                                                sortedRanges[0].End);
                    if (isParentSeqRangesRequired)
                    {
                        AddParent(seqRange, sortedRanges[0]);
                    }

                    sortedRanges.RemoveAt(0);
                    seqRangeGroup.Add(seqRange);

                    if (sortedRanges.Count > 0)
                    {
                        while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap)
                        {
                            seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End);
                            if (isParentSeqRangesRequired)
                            {
                                AddParent(seqRange, sortedRanges[0]);
                            }

                            sortedRanges.RemoveAt(0);
                        }
                    }
                }

                sortedRanges.Clear();
            }

            return(seqRangeGroup);
        }
コード例 #7
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
        CreateNodeXLVennDiagramWorkbookFromSequenceRangeGroupings
            (
            Microsoft.Office.Interop.Excel.Application oApplication,
            SequenceRangeGrouping srgA,
            SequenceRangeGrouping srgB
            )
        {
            // create the proper sets for VennDiagram
            SequenceRangeGrouping srgOnly_A, srgOnly_B, srgOnly_AB;
            CreateSequenceRangeGroupingsForVennDiagram(srgA, srgB, out srgOnly_A, out srgOnly_B, out srgOnly_AB);

            SequenceRangeGroupingMetrics srgmOnly_A = new SequenceRangeGroupingMetrics(srgOnly_A);
            SequenceRangeGroupingMetrics srgmOnly_B = new SequenceRangeGroupingMetrics(srgOnly_B);
            SequenceRangeGroupingMetrics srgmOnly_AB = new SequenceRangeGroupingMetrics(srgOnly_AB);

            VennDiagramData vdd = new VennDiagramData(srgmOnly_A.bases
                , srgmOnly_B.bases
                , srgmOnly_AB.bases);

            Workbook oWorkbook = CreateNodeXLVennDiagramWorkbook(oApplication, vdd);

            // write source data to workbook
            DisplaySourceData(srgA, Resources.A, oWorkbook);
            DisplaySourceData(srgB, Resources.B, oWorkbook);

            // Write overlap data to a sheet
            Worksheet outputSheet = oWorkbook.Sheets.Add(Type.Missing, oWorkbook.Sheets[oWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet);
            outputSheet.Name = Resources.OverlapsSheetName;
            WriteOverlapData(outputSheet, srgOnly_A, srgOnly_B, srgOnly_AB);

            oApplication.Visible = true;
            return oWorkbook;
        }
コード例 #8
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
        CreateNodeXLVennDiagramWorkbookFromSequenceRangeGroupings
            (
            Microsoft.Office.Interop.Excel.Application oApplication,
            SequenceRangeGrouping srgA,
            SequenceRangeGrouping srgB,
            SequenceRangeGrouping srgC
            )
        {
            SequenceRangeGrouping srgOnly_A, srgOnly_B, srgOnly_C, srgOnly_AB, srgOnly_AC, srgOnly_BC, srgOnly_ABC;
            CreateSequenceRangeGroupingsForVennDiagram(srgA
                , srgB
                , srgC
                , out srgOnly_A
                , out srgOnly_B
                , out srgOnly_C
                , out srgOnly_AB
                , out srgOnly_AC
                , out srgOnly_BC
                , out srgOnly_ABC);

            // generate the intersection Venn metrics
            SequenceRangeGroupingMetrics srgmOnly_A = new SequenceRangeGroupingMetrics(srgOnly_A);
            SequenceRangeGroupingMetrics srgmOnly_B = new SequenceRangeGroupingMetrics(srgOnly_B);
            SequenceRangeGroupingMetrics srgmOnly_C = new SequenceRangeGroupingMetrics(srgOnly_C);
            SequenceRangeGroupingMetrics srgmOnly_AB = new SequenceRangeGroupingMetrics(srgOnly_AB);
            SequenceRangeGroupingMetrics srgmOnly_AC = new SequenceRangeGroupingMetrics(srgOnly_AC);
            SequenceRangeGroupingMetrics srgmOnly_BC = new SequenceRangeGroupingMetrics(srgOnly_BC);
            SequenceRangeGroupingMetrics srgmOnly_ABC = new SequenceRangeGroupingMetrics(srgOnly_ABC);

            // create the NodeXL Venn diagram filefile
            VennDiagramData vdd = new VennDiagramData(srgmOnly_A.bases
                , srgmOnly_B.bases
                , srgmOnly_C.bases
                , srgmOnly_AB.bases
                , srgmOnly_AC.bases
                , srgmOnly_BC.bases
                , srgmOnly_ABC.bases);

            // To ensure NodeXL displays the diagram, DONOT make the application 
            // visible or update the screen until the parameters are all set up.
            oApplication.ScreenUpdating = false;
            Workbook oWorkbook = CreateNodeXLVennDiagramWorkbook(oApplication, vdd);

            // write source data to workbook
            DisplaySourceData(srgA, Resources.A, oWorkbook);
            DisplaySourceData(srgB, Resources.B, oWorkbook);
            DisplaySourceData(srgC, Resources.C, oWorkbook);

            // Write overlap data to a sheet
            Worksheet outputSheet = oWorkbook.Sheets.Add(Type.Missing, oWorkbook.Sheets[oWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet);
            outputSheet.Name = Resources.OverlapsSheetName;
            WriteOverlapData(outputSheet, srgOnly_A, srgOnly_B, srgOnly_C, srgOnly_AB, srgOnly_AC, srgOnly_BC, srgOnly_ABC);

            oApplication.ScreenUpdating = true;
            oApplication.Visible = true;
            return oWorkbook;
        }
コード例 #9
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
 CreateSequenceRangeGroupingsForVennDiagram
     (
     SequenceRangeGrouping srgA,
     SequenceRangeGrouping srgB,
     out SequenceRangeGrouping srgOnly_A,
     out SequenceRangeGrouping srgOnly_B,
     out SequenceRangeGrouping srgOnly_AB
     )
 {
     // Create the proper intersected sets from the two original SequenceRangeGroups
     SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB); // use set terminology (Union) or boolean logic (Or)
     srgOnly_A = srgAB.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals); // TODO: Subtract and Intersect should use same 'logic' (for bool 3rd arg)
     srgOnly_B = srgAB.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);
     srgOnly_AB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals);
 }
コード例 #10
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
        CreateSequenceRangeGroupingsForVennDiagram
            (
            SequenceRangeGrouping srgA,
            SequenceRangeGrouping srgB,
            SequenceRangeGrouping srgC,
            out SequenceRangeGrouping srgOnly_A,
            out SequenceRangeGrouping srgOnly_B,
            out SequenceRangeGrouping srgOnly_C,
            out SequenceRangeGrouping srgOnly_AB,
            out SequenceRangeGrouping srgOnly_AC,
            out SequenceRangeGrouping srgOnly_BC,
            out SequenceRangeGrouping srgOnly_ABC
            )
        {
            // create the proper sets for a 3 circle Venn Diagram
            SequenceRangeGrouping srgBC = srgB.MergeOverlaps(srgC);
            srgOnly_A = srgA.Subtract(srgBC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);

            SequenceRangeGrouping srgAC = srgA.MergeOverlaps(srgC);
            srgOnly_B = srgB.Subtract(srgAC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);

            SequenceRangeGrouping srgAB = srgA.MergeOverlaps(srgB);
            srgOnly_C = srgC.Subtract(srgAB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);

            srgAB = srgA.Intersect(srgB, 1, IntersectOutputType.OverlappingPiecesOfIntervals);
            srgOnly_AB = srgAB.Subtract(srgC, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);
            srgAC = srgA.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals);
            srgOnly_AC = srgAC.Subtract(srgB, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);
            srgBC = srgB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals);
            srgOnly_BC = srgBC.Subtract(srgA, 1, SubtractOutputType.NonOverlappingPiecesOfIntervals);

            srgOnly_ABC = srgAB.Intersect(srgC, 1, IntersectOutputType.OverlappingPiecesOfIntervals);
        }
コード例 #11
0
        /// <summary>
        /// For each group in the grouping, this method traverses through each range
        /// in the group and normalizes the ranges down to the minimal spanning set
        /// required to still show the same range spans.
        /// 
        /// For instance if you had in group 'Chr1' the following ranges:
        /// 
        /// -> 10 to 100
        /// -> 200 to 250
        /// -> 35 to 45
        /// -> 90 to 150
        /// 
        /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to:
        /// For minOverlap = 0
        /// 
        /// -> 10 to 150
        /// -> 200 to 250
        /// 
        /// for minOverlap = -50
        /// 
        /// -> 10 to 250
        /// 
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The overlapped sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping();
            List<ISequenceRange> sortedRanges = new List<ISequenceRange>();
            foreach (List<ISequenceRange> rangeList in this.groups.Values)
            {
                sortedRanges.AddRange(rangeList);
                sortedRanges.Sort();

                while (sortedRanges.Count > 0)
                {
                    ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID,
                                                sortedRanges[0].Start,
                                                sortedRanges[0].End);
                    if (isParentSeqRangesRequired)
                    {
                        AddParent(seqRange, sortedRanges[0]);
                    }

                    sortedRanges.RemoveAt(0);
                    seqRangeGroup.Add(seqRange);

                    if (sortedRanges.Count > 0)
                    {
                        while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap)
                        {
                            seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End);
                            if (isParentSeqRangesRequired)
                            {
                                AddParent(seqRange, sortedRanges[0]);
                            }

                            sortedRanges.RemoveAt(0);
                        }
                    }
                }

                sortedRanges.Clear();
            }

            return seqRangeGroup;
        }
コード例 #12
0
ファイル: InputSelection.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Method called when the user clicks Ok button on InputSelectionDialog.
        /// Takes care of parsing the selections and returning the result to the user.
        /// In case there was an error parsing, it will show the input selection dialog again with the sequence highlighted.
        /// </summary>
        /// <param name="dialog">InputSequenceDialog object which raised this event</param>
        private void OnInputSequenceRangeDialogSubmit(ISelectionDialog dialog)
        {
            InputSelectionDialog selectionDialog = dialog as InputSelectionDialog;
            GroupData cachedData = null;

            // maps sheet to its column-mapping
            Dictionary<string, Dictionary<int, string>> columnMappedSheets =
                    new Dictionary<string, Dictionary<int, string>>();

            // Goes in the cache and is the output of this method as well.
            Dictionary<SequenceRangeGrouping, GroupData> groupsData =
                    new Dictionary<SequenceRangeGrouping, GroupData>();
            List<SequenceRangeGrouping> parsedSequences = new List<SequenceRangeGrouping>();

            SequenceRangeGrouping sequenceRangeGroup = null;
            Dictionary<string, Dictionary<ISequenceRange, string>> sheetData = null;
            Dictionary<ISequenceRange, string> rangeData = null;
            List<ISequenceRange> sequenceRanges = null;

            List<Range> rangesInCurrentSequenceItem;

            // Regular expression to read the sheet name from address
            Regex regexSheetname = new Regex(@"(?<Sheetname>^.[^!]*)", RegexOptions.IgnoreCase);
            Match matchSheetname = null;
            string sheetName = string.Empty;

            try
            {
                foreach (InputSequenceItem currentSequenceItem in selectionDialog.GetSequences())
                {
                    try
                    {
                        rangesInCurrentSequenceItem = GetRanges(currentSequenceItem.SequenceAddress);
                        // get from cache
                        cachedData = SequenceCache.TryGetSequence(rangesInCurrentSequenceItem, selectionDialog.InputParamsAsKey) as GroupData;
                        if (cachedData != null)
                        {
                            // got from cache
                            cachedData.Name = currentSequenceItem.SequenceName; // Set ID

                            if (currentSequenceItem.IsUseMetadataSelected)
                            {
                                parsedSequences.Insert(0, cachedData.Group);
                            }
                            else
                            {
                                parsedSequences.Add(cachedData.Group);
                            }

                            if (!groupsData.ContainsKey(cachedData.Group))
                            {
                                groupsData.Add(cachedData.Group, cachedData);
                            }
                        }
                        else
                        {
                            // parse it as its not in cache
                            sheetData = new Dictionary<string, Dictionary<ISequenceRange, string>>();
                            sequenceRanges = new List<ISequenceRange>();
                            foreach (Range currentRange in rangesInCurrentSequenceItem)
                            {
                                bool firstRowIsHeader = false;

                                // See if the sheet in which this range is, has a column mapping
                                if (!columnMappedSheets.ContainsKey(GetMappingKey(currentRange)))
                                {
                                    (currentRange.Worksheet as _Worksheet).Activate();
                                    currentRange.Select();
                                    Dictionary<int, string> mapping = GetMappingForRange(currentRange, out firstRowIsHeader);
                                    if (mapping == null)
                                    {
                                        // Could not get a proper mapping. So redirect to previous window.
                                        selectionDialog.ShowDialog();
                                        return;
                                    }

                                    if (firstRowIsHeader)
                                    {
                                        UpdateColumnHeaders(currentRange, mapping);
                                    }

                                    columnMappedSheets.Add(GetMappingKey(currentRange), mapping);
                                }

                                // If range has a header, remove first row from it before sending it for parsing.
                                Range rangeToParse;
                                if (firstRowIsHeader)
                                {
                                    if (currentRange.Rows.Count == 1) // only one row which is marked as header, throw error
                                    {
                                        throw new InvalidOperationException(Resources.SelectionModel_ParsingFailed);
                                    }

                                    rangeToParse = currentRange.get_Offset(1, 0);
                                    rangeToParse = rangeToParse.get_Resize(currentRange.Rows.Count - 1, currentRange.Columns.Count);
                                }
                                else
                                {
                                    rangeToParse = currentRange;
                                }

                                Dictionary<ISequenceRange, string> srCollection =
                                        ExcelSelectionParser.RangeToSequenceRange(
                                            rangeToParse,
                                            columnMappedSheets[GetMappingKey(currentRange)]);

                                foreach (KeyValuePair<ISequenceRange, string> sr in srCollection)
                                {
                                    matchSheetname = regexSheetname.Match(sr.Value);
                                    if (matchSheetname.Success)
                                    {
                                        sheetName = matchSheetname.Groups["Sheetname"].Value;
                                        if (sheetData.TryGetValue(sheetName, out rangeData))
                                        {
                                            rangeData.Add(sr.Key, sr.Value);
                                        }
                                        else
                                        {
                                            rangeData = new Dictionary<ISequenceRange, string>();
                                            sheetData.Add(sheetName, rangeData);
                                            rangeData.Add(sr.Key, sr.Value);
                                        }

                                        sequenceRanges.Add(sr.Key);
                                    }
                                }
                            }

                            sequenceRangeGroup = new SequenceRangeGrouping(sequenceRanges);
                            cachedData = new GroupData(sequenceRangeGroup,
                                    currentSequenceItem.SequenceName,
                                    sheetData);
                            SequenceCache.Add(rangesInCurrentSequenceItem, cachedData, selectionDialog.InputParamsAsKey);

                            if (currentSequenceItem.IsUseMetadataSelected)
                            {
                                parsedSequences.Insert(0, cachedData.Group);
                            }
                            else
                            {
                                parsedSequences.Add(cachedData.Group);
                            }

                            groupsData.Add(cachedData.Group, cachedData);
                        }
                    }
                    catch
                    {
                        // Set error status on the current item and re-throw the error
                        currentSequenceItem.SetErrorStatus(true);
                        throw;
                    }
                }

                Dictionary<string, object> parameters = new Dictionary<string, object>();
                parameters.Add(InputSelection.OVERLAP, selectionDialog.OverlappingBasePairs);
                parameters.Add(InputSelection.MINIMUMOVERLAP, selectionDialog.MinOverLap);

                // On successful completion of parsing...
                if (inputSequenceRangeSelectionComplete != null)
                {
                    InputSequenceRangeSelectionEventArg eventArg =
                            new InputSequenceRangeSelectionEventArg(groupsData,
                                    parsedSequences,
                                    parameters,
                                    argsForCallback);
                    inputSequenceRangeSelectionComplete(eventArg);
                }

                selectionDialog.InputSelectionDialogSubmitting -= OnInputSequenceDialogSubmit;
                selectionDialog.Close();

            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, Resources.CAPTION, MessageBoxButtons.OK, MessageBoxIcon.Error);
                selectionDialog.ShowDialog();
            }
        }
コード例 #13
0
        /// <summary>
        /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping.
        /// 
        /// For instance if you had in group 'Chr1' the following ranges:
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///    0 to   10                 20 to   40
        ///   30 to   50                 70 to  100     
        ///   60 to   80                400 to  800
        ///  300 to  500                850 to  900
        ///  600 to  700                900 to 1200
        ///  800 to 1000                
        /// 
        /// Result for minOverlap set to 1
        ///     1. If outputType is OverlappingPiecesOfIntervals.
        ///         30 to 40
        ///         70 to 80
        ///         400 to 500
        ///         600 o 700
        ///         850 to 900
        ///         900 to 1000
        ///     2. If outputType is OverlappingIntervals
        ///          30 to   50
        ///          60 to   80
        ///         300 to  500
        ///         600 to  700
        ///         800 to 1000
        ///         
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.
        /// By default this will be set to 1.</param>
        /// <param name="outputType">
        /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. 
        /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with 
        /// query ranges will be returned.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence ranges.</param>
        /// <returns>The intersected result.</returns>
        public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result = new SequenceRangeGrouping();
            List<ISequenceRange> refSeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> querySeqRanges = new List<ISequenceRange>();
            SequenceRange range = null;

            // merge the query sequence ranges.
            IList<ISequenceRange> queryList = null;
            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            // If the minOverlap is lessthan or equal to zero and overlapping intervals are required.
                            // then add the ref seq to result.
                            if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                result.Add(range);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }
                            }

                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals)
                            {
                                // Add ref sequence only once for query ranges having same start and end.
                                if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End))
                                {
                                    range = new SequenceRange(
                                        refRange.ID,
                                        Math.Max(queryRange.Start, refRange.Start),
                                        Math.Min(queryRange.End, refRange.End));

                                    result.Add(range);
                                    CopyOfMetadata(range, refRange);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                // Add ref sequence only once.
                                if (previousOverlappingRange == null)
                                {
                                    range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                    CopyOfMetadata(range, refRange);
                                    result.Add(range);
                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
            }

            return result;
        }
コード例 #14
0
ファイル: BEDOperationsTests.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Method to validate parent seq ranges in result.
        /// </summary>
        /// <param name="resultSeqRange">Result seq range group.</param>
        /// <param name="refSeqRange">Reference seq range group.</param>
        /// <param name="querySeqRange">Query seq range group.</param>
        /// <param name="isParentSeqRangeRequired">Flag to indicate whether result should contain parent seq ranges or not.</param>
        /// <returns>Returns true if the parent seq ranges are valid; otherwise returns false.</returns>
        private static bool ValidateParentSeqRange(SequenceRangeGrouping resultSeqRange, SequenceRangeGrouping refSeqRange,
            SequenceRangeGrouping querySeqRange, bool isParentSeqRangeRequired)
        {
            IList<ISequenceRange> refSeqRangeList = new List<ISequenceRange>();
            IList<ISequenceRange> querySeqRangeList = new List<ISequenceRange>();

            foreach (string groupid in resultSeqRange.GroupIDs)
            {
                if (refSeqRange != null)
                {
                    refSeqRangeList = refSeqRange.GetGroup(groupid);
                }

                if (querySeqRange != null)
                {
                    querySeqRangeList = querySeqRange.GetGroup(groupid);
                }


                foreach (ISequenceRange resultRange in resultSeqRange.GetGroup(groupid))
                {
                    if (!isParentSeqRangeRequired)
                    {
                        if (resultRange.ParentSeqRanges.Count != 0)
                        {
                            return false;
                        }
                    }
                    else
                    {

                        int refCount = refSeqRangeList.Where(R => resultRange.ParentSeqRanges.Contains(R)).Count();
                        int queryCount = querySeqRangeList.Where(R => resultRange.ParentSeqRanges.Contains(R)).Count();


                        if (refCount + queryCount != resultRange.ParentSeqRanges.Count)
                        {
                            return false;
                        }
                    }
                }
            }

            return true;
        }
コード例 #15
0
ファイル: Orphans.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Get chromoses with orphan regions
        /// </summary>
        /// <param name="filename">Path of the BAM file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="deviation">Standard deviation</param>
        /// <returns></returns>
        private void DisplayOrphans(string filename)
        {

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList<PairedRead> pairedReads = null;

            // Get Aligned sequences
            IList<SAMAlignedSequence> alignedSeqs = alignmentMapobj.QuerySequences;
            pairedReads = alignmentMapobj.GetPairedReads(0, 0);


            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);
            int count = orphans.Count();
            if (count == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            var orphanRegions = new List<ISequenceRange>(count);
            orphanRegions.AddRange(orphans.Select(orphanRead => GetRegion(orphanRead.Read1)));

            // Get sequence range grouping object.
            SequenceRangeGrouping rangeGroup = new SequenceRangeGrouping(orphanRegions);

            if (!rangeGroup.GroupIDs.Any())
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("Region of Orphan reads:");
                DisplaySequenceRange(rangeGroup);
            }

            SequenceRangeGrouping mergedRegions = rangeGroup.MergeOverlaps();

            if (!mergedRegions.GroupIDs.Any())
            {
                Console.Write("\r\nNo hot spots to display");
            }
            else
            {
                Console.Write("\r\nChromosomal hot spot:");
                DisplaySequenceRange(mergedRegions);
            }
        }
コード例 #16
0
ファイル: BedStats.cs プロジェクト: cpatmoore/bio
        //
        // Read a Bed file into memory
        //
        public static SequenceRangeGrouping ReadBedFile(string filename)
        {
            var parser = new BedParser();
            IList<ISequenceRange> listSequenceRange = parser.ParseRange(filename);
            if (verbose)
            {
                //listSequenceRange.ToString();
                Console.Error.WriteLine("Processed File: {0}", filename);
                ListSequenceRangeToString(listSequenceRange);
            }

            var srg = new SequenceRangeGrouping(listSequenceRange);
            if (arguments.normalizeInputs)
            {
                srg.MergeOverlaps(); // could be called Normalize() or Cannonicalize()
            }
            return srg;
        }
コード例 #17
0
ファイル: Orphans.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Display Sequence range grops
        /// </summary>
        /// <param name="seqRangeGrops">Sequence Ranges grops</param>
        private static void DisplaySequenceRange(SequenceRangeGrouping seqRangeGrop)
        {
            IEnumerable<string> rangeGroupIds = seqRangeGrop.GroupIDs;
            string rangeID = string.Empty;

            // Display Sequence Ranges
            Console.Write("\r\nChromosome\t\tStart\tEnd");

            foreach (string groupID in rangeGroupIds)
            {
                rangeID = groupID;

                // Get SequenceRangeIds.
                List<ISequenceRange> rangeList = seqRangeGrop.GetGroup(rangeID);

                foreach (ISequenceRange seqRange in rangeList)
                {
                    Console.Write("\n{0}\t\t\t{1}\t{2}", seqRange.ID.ToString(),
                        seqRange.Start.ToString(), seqRange.End.ToString());

                }
            }

        }
コード例 #18
0
ファイル: LengthAnomaly.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Indentify hot spot chromosomes for length anamoly regions.
        /// </summary>
        /// <param name="inputFile"> Input file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="standardDeviation">Standard deviation</param>
        private void IdentifyLentghAnamolies(string filename,
             float mean = -1, float deviation = -1)
        {
            bool calculateMeanNdeviation = false;

            if (mean == -1 || deviation == -1)
            {
                calculateMeanNdeviation = true;
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList<PairedRead> pairedReads = null;

            if (calculateMeanNdeviation)
            {
                pairedReads = alignmentMapobj.GetPairedReads();
            }
            else
            {
                pairedReads = alignmentMapobj.GetPairedReads(mean, deviation);
            }

            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);


            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List<ISequenceRange> orphanRegions = new List<ISequenceRange>(orphans.Count());
            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping for Orphan regions.
            SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions);

            // Get the Length anomalies regions.
            var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly);

            if (lengthAnomalies.Count() == 0)
            {
                Console.WriteLine("No Anomalies to display");
            }

            List<ISequenceRange> lengthAnomalyRegions = new List<ISequenceRange>(lengthAnomalies.Count());
            foreach (PairedRead laRead in lengthAnomalies)
            {
                SequenceRange range = new SequenceRange();
                range.ID = laRead.Read1.RName;
                range.Start = laRead.Read1.Pos;
                range.End = laRead.Read1.Pos + laRead.InsertLength;
                lengthAnomalyRegions.Add(range);
            }

            // Get sequence range grouping for length anomaly regions.
            SequenceRangeGrouping lengthAnomalyRangegroup =
                                new SequenceRangeGrouping(lengthAnomalyRegions);
            if (lengthAnomalyRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Length anomalies reads to display");
            }
            else
            {
                Console.Write("Region of length anomaly:");
                DisplaySequenceRange(lengthAnomalyRangegroup);
            }

            if (orphanRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("\r\nRegion of Orphan reads:");
                DisplaySequenceRange(orphanRangegroup);
            }

            SequenceRangeGrouping intersectedRegions =
                lengthAnomalyRangegroup.Intersect(orphanRangegroup);
            if (intersectedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Hot spots found");
            }
            else
            {
                Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:");
                DisplaySequenceRange(intersectedRegions);
            }



        }
コード例 #19
0
 public SequenceRangeGroupingMetrics(SequenceRangeGrouping srg)
 {
     ComputeSequenceRangeGroupingMetrics(srg);
 }
コード例 #20
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// This method will dump the region overlap data to a sheet in the NodeXL workbook.
        /// </summary>
        /// <param name="outputSheet">Sheet to which the data should be dumped to</param>
        /// <param name="srgOnly_A">Only A</param>
        /// <param name="srgOnly_B">Only A</param>
        /// <param name="srgOnly_AB">Only A</param>
        /// <returns>Worksheet after writing data</returns>
        private static Worksheet WriteOverlapData(Worksheet outputSheet, SequenceRangeGrouping srgOnly_A, SequenceRangeGrouping srgOnly_B, SequenceRangeGrouping srgOnly_AB)
        {
            outputSheet.Application.ScreenUpdating = false;
            outputSheet.Application.EnableEvents = false;
            List<ISequenceRange> seqs_AList = null;
            List<ISequenceRange> seqs_BList = null;
            List<ISequenceRange> seqs_ABList = null;
            int dataStartRow = 7;
            int currentRow;
            try
            {
                // write the header
                outputSheet.Range["A2"].Value2 = Resources.RegionsCountLabel;
                outputSheet.Range["A3"].Value2 = Resources.BasePairsCountLabel;
                outputSheet.Range["A4"].Value2 = Resources.PerOfBasePairsInRegionLabel;
                outputSheet.Range["D1"].Value2 = Resources.OnlyA;
                outputSheet.Range["E1"].Value2 = Resources.OnlyB;
                outputSheet.Range["F1"].Value2 = Resources.OnlyAB;
                outputSheet.Range["G1"].Value2 = Resources.Total;

                outputSheet.Range["A6", "F6"].Value2 = BedHeadersForTwoInput;
                outputSheet.Range["A6", "F6"].Cells.Borders[XlBordersIndex.xlEdgeBottom].Weight = XlBorderWeight.xlThin;
                outputSheet.Range["A1", "F1"].Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter;
                outputSheet.Range["A6", "F6"].Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter;
                currentRow = dataStartRow;

                // Get all groupID's in all ranges
                List<string> groupIDs = new List<string>();
                groupIDs.AddRange(srgOnly_A.GroupIDs);
                groupIDs.AddRange(srgOnly_B.GroupIDs);
                groupIDs.AddRange(srgOnly_AB.GroupIDs);

                int prevRow = dataStartRow;

                // Display all overlap data
                foreach (string groupID in groupIDs.Distinct())
                {
                    seqs_AList = srgOnly_A.GetGroup(groupID);
                    seqs_BList = srgOnly_B.GetGroup(groupID);
                    seqs_ABList = srgOnly_AB.GetGroup(groupID);

                    if (seqs_AList == null) seqs_AList = new List<ISequenceRange>();
                    if (seqs_BList == null) seqs_BList = new List<ISequenceRange>();
                    if (seqs_ABList == null) seqs_ABList = new List<ISequenceRange>();

                    int indexA = 0;
                    int indexB = 0;
                    int indexAB = 0;

                    object[,] output = new object[seqs_AList.Count + seqs_BList.Count + seqs_ABList.Count, 6];

                    while (indexA < seqs_AList.Count || indexB < seqs_BList.Count || indexAB < seqs_ABList.Count)
                    {
                        ISequenceRange a = indexA < seqs_AList.Count ? seqs_AList[indexA] : null;
                        ISequenceRange b = indexB < seqs_BList.Count ? seqs_BList[indexB] : null;
                        ISequenceRange ab = indexAB < seqs_ABList.Count ? seqs_ABList[indexAB] : null;

                        ISequenceRange range = GetSmallestSeqRangeToDisplay(a, b, ab);

                        if (range == null)
                        {
                            indexA++;
                            indexB++;
                            indexAB++;

                            continue;
                        }

                        output[currentRow - prevRow, 0] = range.ID;
                        output[currentRow - prevRow, 1] = range.Start;
                        output[currentRow - prevRow, 2] = range.End;

                        if (range == a)
                        {
                            // display
                            output[currentRow - prevRow, 3] = Math.Abs(range.End - range.Start);
                            indexA++;
                        }
                        else if (range == b)
                        {
                            // display
                            output[currentRow - prevRow, 4] = Math.Abs(range.End - range.Start);
                            indexB++;
                        }
                        else
                        {
                            // display
                            output[currentRow - prevRow, 5] = Math.Abs(range.End - range.Start);
                            indexAB++;
                        }

                        currentRow++;
                    }

                    outputSheet.Range["A" + prevRow.ToString(), "F" + (currentRow -1).ToString()].Value2 = output;
                    prevRow = currentRow;
                }

                string formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "D", dataStartRow, "D", currentRow - 1);
                outputSheet.Range["D2"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "E", dataStartRow, "E", currentRow - 1);
                outputSheet.Range["E2"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, CountFormulaFormat, "F", dataStartRow, "F", currentRow - 1);
                outputSheet.Range["F2"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 2, "F", 2);
                outputSheet.Range["G2"].Formula = formula;

                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", dataStartRow, "D", currentRow - 1);
                outputSheet.Range["D3"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "E", dataStartRow, "E", currentRow - 1);
                outputSheet.Range["E3"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "F", dataStartRow, "F", currentRow - 1);
                outputSheet.Range["F3"].Formula = formula;
                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 3, "F", 3);
                outputSheet.Range["G3"].Formula = formula;

                outputSheet.Range["D4"].Formula = "=D3/G3";
                outputSheet.Range["E4"].Formula = "=E3/G3";
                outputSheet.Range["F4"].Formula = "=F3/G3";
                formula = string.Format(CultureInfo.InvariantCulture, SumFormulaFormat, "D", 4, "F", 4);
                outputSheet.Range["G4"].Formula = formula;
                outputSheet.Range["D4", "G4"].Cells.NumberFormat = "0.00%";

                outputSheet.UsedRange.Columns.AutoFit();
            }
            finally
            {
                outputSheet.Application.ScreenUpdating = true;
                outputSheet.Application.EnableEvents = true;
            }

            return outputSheet;
        }
コード例 #21
0
ファイル: BedStats.cs プロジェクト: cpatmoore/bio
        // default printing of SequenceRangeGrouping
        //
        public static void SequenceRangeGroupingToString(SequenceRangeGrouping srg, string name)
        {
            Console.Error.Write("[{0}] : SeqeuenceRangeGrouping: ", name);
            var srgm = new SequenceRangeGroupingMetrics(srg);
            Console.Error.WriteLine("{0}, {1}, {2}", srgm.groups, srgm.ranges, srgm.bases);

            foreach (string id in srg.GroupIDs)
            {
                Console.Error.WriteLine("--GroupID: {0}, {1}", id, srg.GetGroup(id).Count());
                ListSequenceRangeToString(srg.GetGroup(id));
            }
            Console.Error.WriteLine();
        }
コード例 #22
0
ファイル: VennToNodeXL.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Write source sequence ranges to the given workbook in a new sheet
        /// </summary>
        /// <param name="sourceSequenceRanges">Source sequence ranges</param>
        /// <param name="sheetName">Name to be given for the new sheet</param>
        /// <param name="targetWorkbook">Workbook in which the sheet has to be added</param>
        /// <returns>Newly created sheet with source data.</returns>
        private static Worksheet DisplaySourceData(SequenceRangeGrouping sourceSequenceRanges, string sheetName, Workbook targetWorkbook)
        {
            //create new sheet
            Worksheet outputSheet = targetWorkbook.Sheets.Add(Type.Missing, targetWorkbook.Sheets[targetWorkbook.Sheets.Count], 1, XlSheetType.xlWorksheet);
            outputSheet.Name = sheetName;

            targetWorkbook.Application.ScreenUpdating = false;
            targetWorkbook.Application.EnableEvents = false;

            try
            {
                // write the header
                outputSheet.Range["A1", "C1"].Value2 = BedHeaders;

                // write bed data
                int currentRow = 2;
                foreach (string groupID in sourceSequenceRanges.GroupIDs)
                {
                    Range outputRange = WriteSequenceRangeAt(outputSheet.Cells[currentRow, 1], sourceSequenceRanges.GetGroup(groupID), false);
                    currentRow += outputRange.Rows.Count;
                }

                outputSheet.UsedRange.Columns.AutoFit();
            }
            finally
            {
                targetWorkbook.Application.ScreenUpdating = true;
                targetWorkbook.Application.EnableEvents = true;
            }

            return outputSheet;
        }
コード例 #23
0
        /// <summary>
        /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping.
        ///
        /// For instance if you had in group 'Chr1' the following ranges:
        ///
        ///  Ranges in this instance   Ranges in the query
        ///    0 to   10                 20 to   40
        ///   30 to   50                 70 to  100
        ///   60 to   80                400 to  800
        ///  300 to  500                850 to  900
        ///  600 to  700                900 to 1200
        ///  800 to 1000
        ///
        /// Result for minOverlap set to 1
        ///     1. If outputType is OverlappingPiecesOfIntervals.
        ///         30 to 40
        ///         70 to 80
        ///         400 to 500
        ///         600 o 700
        ///         850 to 900
        ///         900 to 1000
        ///     2. If outputType is OverlappingIntervals
        ///          30 to   50
        ///          60 to   80
        ///         300 to  500
        ///         600 to  700
        ///         800 to 1000
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.
        /// By default this will be set to 1.</param>
        /// <param name="outputType">
        /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals.
        /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with
        /// query ranges will be returned.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence ranges.</param>
        /// <returns>The intersected result.</returns>
        public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result         = new SequenceRangeGrouping();
            List <ISequenceRange> refSeqRanges   = new List <ISequenceRange>();
            List <ISequenceRange> querySeqRanges = new List <ISequenceRange>();
            SequenceRange         range          = null;

            // merge the query sequence ranges.
            IList <ISequenceRange> queryList = null;

            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            // If the minOverlap is lessthan or equal to zero and overlapping intervals are required.
                            // then add the ref seq to result.
                            if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                result.Add(range);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }
                            }

                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals)
                            {
                                // Add ref sequence only once for query ranges having same start and end.
                                if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End))
                                {
                                    range = new SequenceRange(
                                        refRange.ID,
                                        Math.Max(queryRange.Start, refRange.Start),
                                        Math.Min(queryRange.End, refRange.End));

                                    result.Add(range);
                                    CopyOfMetadata(range, refRange);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                // Add ref sequence only once.
                                if (previousOverlappingRange == null)
                                {
                                    range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                    CopyOfMetadata(range, refRange);
                                    result.Add(range);
                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
            }

            return(result);
        }
コード例 #24
0
ファイル: BioRibbon.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Formats and writes the query region (Output of Merge/Subtract/Intersect) operations
        /// </summary>
        /// <param name="resultWorkbook">
        /// Workbook to which Range has to be written
        /// </param>
        /// <param name="resultSheetname">New worksheet name</param>
        /// <param name="resultGroup">Output group</param>
        /// <param name="groupsData">
        /// Complete input groups information
        /// Contains individual Group, sheet and addresses of ISequenceRange
        /// </param>
        private void WriteSequenceRange(
            Workbook resultWorkbook,
            string resultSheetname,
            SequenceRangeGrouping resultGroup,
            Dictionary<SequenceRangeGrouping, GroupData> groupsData,
            bool showMetadata,
            bool showBasePairCount)
        {
            if (resultGroup.GroupIDs.Count() > 0)
            {
                int baseRowIndex = 2, baseColumnIndex = 2;
                int dataRowIndex = 0, dataColumnIndex = 0;
                int totalColumnCount = 0;
                object[,] values;
                List<string> hyperlinks = null;
                List<ISequenceRange> resultSequenceRanges = null;
                Dictionary<SequenceRangeGrouping, Dictionary<string, int>> groupSheetIndices = null;
                Dictionary<int, Tuple<SequenceRangeGrouping, bool, List<ISequenceRange>>> columnData = null;
                Dictionary<ISequenceRange, string> rangedata = null;
                Dictionary<SequenceRangeGrouping, SequenceRangeGrouping> allSheetData = null;
                Dictionary<SequenceRangeGrouping, int> allSheetCount = null;
                SequenceRangeGrouping groupToMerge = null;
                SequenceRangeGrouping referenceGroup = null;
                SequenceRangeGrouping queryGroup = null;
                SequenceRangeGrouping sheetGroup = null;
                int sheetCount = 0;
                Range activeRange = null;

                var resultWorksheet =
                    resultWorkbook.Worksheets.Add(
                        Type.Missing,
                        resultWorkbook.Worksheets.get_Item(resultWorkbook.Worksheets.Count),
                        Type.Missing,
                        Type.Missing) as Worksheet;
                ((_Worksheet)resultWorksheet).Activate();
                Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel;

                resultWorksheet.Name = resultSheetname;
                activeRange = resultWorksheet.get_Range(GetColumnString(baseColumnIndex) + baseRowIndex, Type.Missing);

                rangedata = groupsData.Values.Select(gd => gd.Metadata) // Get the Metadata
                    .SelectMany(sd => sd.Values).ToList() // Get the Dictionary
                    .SelectMany(rd => rd).ToList().ToDictionary(k => k.Key, v => v.Value); // Convert to dictionary

                groupSheetIndices = new Dictionary<SequenceRangeGrouping, Dictionary<string, int>>();
                baseRowIndex = this.WriteSequenceRangeHeader(
                    resultWorksheet,
                    groupSheetIndices,
                    groupsData,
                    baseRowIndex,
                    baseColumnIndex,
                    ref totalColumnCount,
                    showMetadata,
                    showBasePairCount);
                totalColumnCount -= (baseColumnIndex - 1);

                foreach (string resultGroupKey in resultGroup.GroupIDs)
                {
                    resultSequenceRanges = resultGroup.GetGroup(resultGroupKey);
                    dataRowIndex = 0;
                    values = new object[resultSequenceRanges.Count, totalColumnCount];
                    activeRange = resultWorksheet.get_Range(
                        GetColumnString(baseColumnIndex) + baseRowIndex,
                        Missing.Value);
                    activeRange = activeRange.get_Resize(resultSequenceRanges.Count, totalColumnCount);

                    foreach (ISequenceRange resultSequenceRange in resultSequenceRanges)
                    {
                        referenceGroup = null;
                        queryGroup = null;
                        dataColumnIndex = 0;
                        allSheetData = new Dictionary<SequenceRangeGrouping, SequenceRangeGrouping>();
                        allSheetCount = new Dictionary<SequenceRangeGrouping, int>();

                        values[dataRowIndex, dataColumnIndex] = resultSequenceRange.ID;
                        dataColumnIndex++;
                        values[dataRowIndex, dataColumnIndex] = resultSequenceRange.Start;
                        dataColumnIndex++;
                        values[dataRowIndex, dataColumnIndex] = resultSequenceRange.End;
                        dataColumnIndex++;

                        if (showMetadata)
                        {
                            for (int index = 3; index < rangeHeaders.Count; index++)
                            {
                                values[dataRowIndex, dataColumnIndex] = ExtractRangeMetadata(
                                    resultSequenceRange,
                                    rangeHeaders[index]);
                                dataColumnIndex++;
                            }
                        }

                        columnData = PrepareSequenceRowRange(
                            groupsData,
                            groupSheetIndices,
                            rangedata,
                            resultSequenceRange);

                        foreach (var columnGroup in columnData)
                        {
                            if (showBasePairCount)
                            {
                                // Get the parent ranges for Group's range in a column
                                groupToMerge = new SequenceRangeGrouping(columnGroup.Value.Item3);
                                if (1 < columnGroup.Value.Item3.Count)
                                {
                                    groupToMerge = groupToMerge.MergeOverlaps(0, false);
                                }

                                // Render data for Group's range in a column
                                values[dataRowIndex, columnGroup.Key] =
                                    groupToMerge.GroupRanges.Sum(sr => sr.End - sr.Start);
                                values[dataRowIndex, columnGroup.Key + 1] = columnGroup.Value.Item3.Count;
                            }
                            else
                            {
                                values[dataRowIndex, columnGroup.Key] = columnGroup.Value.Item3.Count;
                            }

                            // Let the hyperlink added
                            hyperlinks = new List<string>();
                            foreach (ISequenceRange range in columnGroup.Value.Item3)
                            {
                                hyperlinks.AddRange(rangedata[range].Split(','));
                            }

                            this.ShowHyperlink(
                                hyperlinks,
                                activeRange,
                                columnGroup.Key,
                                dataRowIndex,
                                showBasePairCount);

                            if (showBasePairCount)
                            {
                                // Calculate data for all group
                                if (allSheetData.TryGetValue(columnGroup.Value.Item1, out sheetGroup))
                                {
                                    allSheetData[columnGroup.Value.Item1] = sheetGroup.MergeOverlaps(
                                        groupToMerge,
                                        0,
                                        false);
                                }
                                else
                                {
                                    allSheetData[columnGroup.Value.Item1] = groupToMerge;
                                }

                                // Build up reference & query groups (later get common range using this)
                                if (columnGroup.Value.Item2)
                                {
                                    if (null == referenceGroup)
                                    {
                                        referenceGroup = groupToMerge;
                                    }
                                    else
                                    {
                                        referenceGroup = referenceGroup.MergeOverlaps(groupToMerge, 0, false);
                                    }
                                }
                                else
                                {
                                    if (null == queryGroup)
                                    {
                                        queryGroup = groupToMerge;
                                    }
                                    else
                                    {
                                        queryGroup = queryGroup.MergeOverlaps(groupToMerge, 0, false);
                                    }
                                }
                            }

                            // Calculate range count for all group
                            if (allSheetCount.TryGetValue(columnGroup.Value.Item1, out sheetCount))
                            {
                                allSheetCount[columnGroup.Value.Item1] += columnGroup.Value.Item3.Count;
                            }
                            else
                            {
                                allSheetCount[columnGroup.Value.Item1] = columnGroup.Value.Item3.Count;
                            }
                        }

                        // Render all columns in SequenceRangeGrouping
                        foreach (var allData in allSheetCount)
                        {
                            dataColumnIndex = groupSheetIndices[allData.Key].Values.Min() - (showBasePairCount ? 2 : 1);
                            if (showBasePairCount)
                            {
                                values[dataRowIndex, dataColumnIndex] =
                                    allSheetData[allData.Key].GroupRanges.Sum(sr => sr.End - sr.Start);
                                dataColumnIndex++;
                            }
                            values[dataRowIndex, dataColumnIndex] = allData.Value;
                        }

                        if (showBasePairCount)
                        {
                            // Render common column in SequenceRangeGrouping
                            if (null != referenceGroup && null != queryGroup)
                            {
                                referenceGroup = referenceGroup.Intersect(
                                    queryGroup,
                                    0,
                                    IntersectOutputType.OverlappingPiecesOfIntervals,
                                    false);
                                values[dataRowIndex, totalColumnCount - 1] =
                                    referenceGroup.GroupRanges.Sum(sr => sr.End - sr.Start);
                            }
                            else
                            {
                                values[dataRowIndex, totalColumnCount - 1] = 0;
                            }
                        }

                        dataRowIndex++;
                    }

                    activeRange.set_Value(Missing.Value, values);

                    baseRowIndex += dataRowIndex;
                }

                resultWorksheet.Columns.AutoFit();
                this.NormalizeColumWidths(resultWorksheet.UsedRange);
                this.EnableAllControls();
            }
            else
            {
                MessageBox.Show(
                    Resources.NO_RESULT,
                    Resources.CAPTION,
                    MessageBoxButtons.OK,
                    MessageBoxIcon.Information);
            }
        }
コード例 #25
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        ///
        /// For example,
        ///
        ///  Ranges in this instance   Ranges in the query
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals.
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping
        /// pieces of intervels along with non overlapping ranges from this instance
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        /// <returns>The resultant Sequence range grouping.</returns>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
                                              long minOverlap = 1,
                                              SubtractOutputType outputType  = SubtractOutputType.NonOverlappingPiecesOfIntervals,
                                              bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result            = new SequenceRangeGrouping();
            List <ISequenceRange> refSeqRanges      = new List <ISequenceRange>();
            List <ISequenceRange> querySeqRanges    = new List <ISequenceRange>();
            List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>();
            SequenceRange         range             = null;

            // merge the query sequence ranges.
            IList <ISequenceRange> queryList = null;

            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                             querySeqRanges,
                                                                                             minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return(result);
        }
コード例 #26
0
        /// <summary>
        /// Merges query sequence ranges with this sequence ranges.
        /// 
        /// For example,
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///    3 to  15                   4 to 10
        ///    5 to  18                  11 to 20
        /// 
        ///  Result for minOverlap set to 1
        ///   3 to 20
        /// 
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query sequence ranges.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The merged sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(SequenceRangeGrouping query, long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameQuery);
            }

            List<ISequenceRange> ranges = new List<ISequenceRange>(this.Flatten());
            ranges.AddRange(query.Flatten());
            SequenceRangeGrouping seqReangeGroup = new SequenceRangeGrouping(ranges);

            return seqReangeGroup.MergeOverlaps(minOverlap, isParentSeqRangesRequired);
        }
コード例 #27
0
 /// <summary>
 /// Merges two sequence ranges with default parameters.
 /// </summary>
 /// <param name="referenceSequence">Reference sequence for merging.</param>
 /// <param name="querySequence">Query sequence for merging.</param>
 /// <returns>SequenceRangeGrouping with merged output.</returns>
 public static SequenceRangeGrouping DoBEDMerge(SequenceRangeGrouping referenceSequence, SequenceRangeGrouping querySequence)
 {
     return referenceSequence.MergeOverlaps(querySequence);
 }
コード例 #28
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        /// 
        /// For example,
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///    
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///    
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///   
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. 
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping 
        /// pieces of intervels along with non overlapping ranges from this instance 
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The resultant Sequence range grouping.</returns>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
            long minOverlap = 1,
            SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals,
            bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result = new SequenceRangeGrouping();
            List<ISequenceRange> refSeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> querySeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>();
            SequenceRange range = null;

            // merge the query sequence ranges.
            IList<ISequenceRange> queryList = null;
            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                            querySeqRanges,
                                                                                            minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping 
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return result;
        }
コード例 #29
0
ファイル: BedStats.cs プロジェクト: cpatmoore/bio
 //
 // print 
 public static long SequenceRangeGroupingCBases(SequenceRangeGrouping srg)
 {
     var srgm = new SequenceRangeGroupingMetrics(srg);
     return (srgm.bases);
 }