예제 #1
0
        /// <summary>
        /// Compares two sequence ranges.
        /// </summary>
        /// <param name="obj">SequenceRange instance to compare.</param>
        /// <returns>
        /// If the Start values of the two ranges are identical then the
        /// result of this comparison is the result from calling CompareTo() on
        /// the two End values. If the Start values are not equal then the result
        /// of this comparison is the result of calling CompareTo() on the two
        /// Start values.
        /// </returns>
        public int CompareTo(object obj)
        {
            if (obj == null)
            {
                return(0);
            }

            SequenceRange sequenceRange = obj as SequenceRange;

            if (obj == null)
            {
                return(0);
            }

            return(CompareTo(sequenceRange));
        }
예제 #2
0
        /// <summary>
        /// For each group in the grouping, this method traverses through each range
        /// in the group and normalizes the ranges down to the minimal spanning set
        /// required to still show the same range spans.
        ///
        /// For instance if you had in group 'Chr1' the following ranges:
        ///
        /// -> 10 to 100
        /// -> 200 to 250
        /// -> 35 to 45
        /// -> 90 to 150
        ///
        /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to:
        /// For minOverlap = 0
        ///
        /// -> 10 to 150
        /// -> 200 to 250
        ///
        /// for minOverlap = -50
        ///
        /// -> 10 to 250
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        /// <returns>The overlapped sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping();
            List <ISequenceRange> sortedRanges  = new List <ISequenceRange>();

            foreach (List <ISequenceRange> rangeList in this.groups.Values)
            {
                sortedRanges.AddRange(rangeList);
                sortedRanges.Sort();

                while (sortedRanges.Count > 0)
                {
                    ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID,
                                                                sortedRanges[0].Start,
                                                                sortedRanges[0].End);
                    if (isParentSeqRangesRequired)
                    {
                        AddParent(seqRange, sortedRanges[0]);
                    }

                    sortedRanges.RemoveAt(0);
                    seqRangeGroup.Add(seqRange);

                    if (sortedRanges.Count > 0)
                    {
                        while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap)
                        {
                            seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End);
                            if (isParentSeqRangesRequired)
                            {
                                AddParent(seqRange, sortedRanges[0]);
                            }

                            sortedRanges.RemoveAt(0);
                        }
                    }
                }

                sortedRanges.Clear();
            }

            return(seqRangeGroup);
        }
예제 #3
0
        /// <summary>
        /// Convert given range containing any bed sequence to SequenceRange object
        /// </summary>
        /// <param name="userSelectedRange">Range of cells</param>
        /// <param name="columnMapping">A dictionary which has a column number to BED header mapping.</param>
        /// <returns>SequenceRange, Address object</returns>
        public static Dictionary<ISequenceRange, string> RangeToSequenceRange(Range userSelectedRange, Dictionary<int, string> columnMapping)
        {
            Dictionary<ISequenceRange, string> sequenceRanges = new Dictionary<ISequenceRange, string>();
            SequenceRange sequenceRange;
            string rangeAddress, cellValue, columnName;
            object[,] cellRange = userSelectedRange.Value2 as object[,]; // cellrange will be a base 1 array
            int rangeWidth = columnMapping.Keys.Max() - columnMapping.Keys.Min() + 1;
            SortedSet<int> sortedColumn = new SortedSet<int>(columnMapping.Keys);

            bool idFound, startFound, endFound;

            for (int row = 1; row <= cellRange.GetLength(0); row++)
            {
                // If hidden row, then skip
                if ((userSelectedRange.Rows[row] as Range).EntireRow.Hidden)
                {
                    continue;
                }

                // Reset the flags to false
                idFound = false;
                startFound = false;
                endFound = false;

                sequenceRange = new SequenceRange();
                rangeAddress = string.Empty;
                for (int col = 1; col <= cellRange.GetLength(1); col++)
                {
                    if (cellRange[row, col] != null)
                    {
                        cellValue = cellRange[row, col].ToString();
                        if (columnMapping.TryGetValue((userSelectedRange[1, col] as Range).Column, out columnName))
                        {
                            switch (columnName) // mapped header for current column
                            {
                                case CHROM_ID:
                                    sequenceRange.ID = cellValue;
                                    idFound = true;
                                    break;

                                case CHROM_START:
                                    sequenceRange.Start = long.Parse(cellValue, CultureInfo.InvariantCulture);
                                    startFound = true;
                                    break;

                                case CHROM_END:
                                    sequenceRange.End = long.Parse(cellValue, CultureInfo.InvariantCulture);
                                    endFound = true;
                                    break;

                                default:
                                    // any other item goes into the metadata dictionary
                                    sequenceRange.Metadata.Add(columnMapping[(userSelectedRange[1, col] as Range).Column], cellValue);
                                    break;
                            }
                        }
                    }
                }

                if (idFound & startFound & endFound)
                {
                    rangeAddress = GetRangeAddress(userSelectedRange, sortedColumn, rangeWidth, row);
                    sequenceRanges.Add(sequenceRange, rangeAddress);
                }
            }

            return sequenceRanges;
        }
예제 #4
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        /// 
        /// For example,
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///    
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///    
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///   
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. 
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping 
        /// pieces of intervels along with non overlapping ranges from this instance 
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The resultant Sequence range grouping.</returns>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
            long minOverlap = 1,
            SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals,
            bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result = new SequenceRangeGrouping();
            List<ISequenceRange> refSeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> querySeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>();
            SequenceRange range = null;

            // merge the query sequence ranges.
            IList<ISequenceRange> queryList = null;
            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                            querySeqRanges,
                                                                                            minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping 
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return result;
        }
예제 #5
0
        /// <summary>
        /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping.
        /// 
        /// For instance if you had in group 'Chr1' the following ranges:
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///    0 to   10                 20 to   40
        ///   30 to   50                 70 to  100     
        ///   60 to   80                400 to  800
        ///  300 to  500                850 to  900
        ///  600 to  700                900 to 1200
        ///  800 to 1000                
        /// 
        /// Result for minOverlap set to 1
        ///     1. If outputType is OverlappingPiecesOfIntervals.
        ///         30 to 40
        ///         70 to 80
        ///         400 to 500
        ///         600 o 700
        ///         850 to 900
        ///         900 to 1000
        ///     2. If outputType is OverlappingIntervals
        ///          30 to   50
        ///          60 to   80
        ///         300 to  500
        ///         600 to  700
        ///         800 to 1000
        ///         
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.
        /// By default this will be set to 1.</param>
        /// <param name="outputType">
        /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals. 
        /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with 
        /// query ranges will be returned.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence ranges.</param>
        /// <returns>The intersected result.</returns>
        public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result = new SequenceRangeGrouping();
            List<ISequenceRange> refSeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> querySeqRanges = new List<ISequenceRange>();
            SequenceRange range = null;

            // merge the query sequence ranges.
            IList<ISequenceRange> queryList = null;
            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            // If the minOverlap is lessthan or equal to zero and overlapping intervals are required.
                            // then add the ref seq to result.
                            if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                result.Add(range);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }
                            }

                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals)
                            {
                                // Add ref sequence only once for query ranges having same start and end.
                                if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End))
                                {
                                    range = new SequenceRange(
                                        refRange.ID,
                                        Math.Max(queryRange.Start, refRange.Start),
                                        Math.Min(queryRange.End, refRange.End));

                                    result.Add(range);
                                    CopyOfMetadata(range, refRange);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                // Add ref sequence only once.
                                if (previousOverlappingRange == null)
                                {
                                    range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                    CopyOfMetadata(range, refRange);
                                    result.Add(range);
                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
            }

            return result;
        }
예제 #6
0
        /// <summary>
        /// For each group in the grouping, this method traverses through each range
        /// in the group and normalizes the ranges down to the minimal spanning set
        /// required to still show the same range spans.
        /// 
        /// For instance if you had in group 'Chr1' the following ranges:
        /// 
        /// -> 10 to 100
        /// -> 200 to 250
        /// -> 35 to 45
        /// -> 90 to 150
        /// 
        /// The result of MergeOverlaps would reduce the ranges in the 'Chr1' group to:
        /// For minOverlap = 0
        /// 
        /// -> 10 to 150
        /// -> 200 to 250
        /// 
        /// for minOverlap = -50
        /// 
        /// -> 10 to 250
        /// 
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The overlapped sequence range grouping.</returns>
        public SequenceRangeGrouping MergeOverlaps(long minOverlap = 0, bool isParentSeqRangesRequired = false)
        {
            SequenceRangeGrouping seqRangeGroup = new SequenceRangeGrouping();
            List<ISequenceRange> sortedRanges = new List<ISequenceRange>();
            foreach (List<ISequenceRange> rangeList in this.groups.Values)
            {
                sortedRanges.AddRange(rangeList);
                sortedRanges.Sort();

                while (sortedRanges.Count > 0)
                {
                    ISequenceRange seqRange = new SequenceRange(sortedRanges[0].ID,
                                                sortedRanges[0].Start,
                                                sortedRanges[0].End);
                    if (isParentSeqRangesRequired)
                    {
                        AddParent(seqRange, sortedRanges[0]);
                    }

                    sortedRanges.RemoveAt(0);
                    seqRangeGroup.Add(seqRange);

                    if (sortedRanges.Count > 0)
                    {
                        while (sortedRanges.Count > 0 && (seqRange.End - sortedRanges[0].Start) >= minOverlap)
                        {
                            seqRange.End = Math.Max(seqRange.End, sortedRanges[0].End);
                            if (isParentSeqRangesRequired)
                            {
                                AddParent(seqRange, sortedRanges[0]);
                            }

                            sortedRanges.RemoveAt(0);
                        }
                    }
                }

                sortedRanges.Clear();
            }

            return seqRangeGroup;
        }
예제 #7
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        ///
        /// For example,
        ///
        ///  Ranges in this instance   Ranges in the query
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals.
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping
        /// pieces of intervels along with non overlapping ranges from this instance
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        /// <returns>The resultant Sequence range grouping.</returns>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
                                              long minOverlap = 1,
                                              SubtractOutputType outputType  = SubtractOutputType.NonOverlappingPiecesOfIntervals,
                                              bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result            = new SequenceRangeGrouping();
            List <ISequenceRange> refSeqRanges      = new List <ISequenceRange>();
            List <ISequenceRange> querySeqRanges    = new List <ISequenceRange>();
            List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>();
            SequenceRange         range             = null;

            // merge the query sequence ranges.
            IList <ISequenceRange> queryList = null;

            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                             querySeqRanges,
                                                                                             minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return(result);
        }
예제 #8
0
        /// <summary>
        /// Returns overlapping sequence ranges from this and specified SequenceRangeGroup for each group in this grouping.
        ///
        /// For instance if you had in group 'Chr1' the following ranges:
        ///
        ///  Ranges in this instance   Ranges in the query
        ///    0 to   10                 20 to   40
        ///   30 to   50                 70 to  100
        ///   60 to   80                400 to  800
        ///  300 to  500                850 to  900
        ///  600 to  700                900 to 1200
        ///  800 to 1000
        ///
        /// Result for minOverlap set to 1
        ///     1. If outputType is OverlappingPiecesOfIntervals.
        ///         30 to 40
        ///         70 to 80
        ///         400 to 500
        ///         600 o 700
        ///         850 to 900
        ///         900 to 1000
        ///     2. If outputType is OverlappingIntervals
        ///          30 to   50
        ///          60 to   80
        ///         300 to  500
        ///         600 to  700
        ///         800 to 1000
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of bases pairs should be overlapped.
        /// By default this will be set to 1.</param>
        /// <param name="outputType">
        /// Type of output required, OverlappingPiecesOfIntervals or OverlappingIntervals.
        /// By default this will be set to OverlappingPiecesOfIntervals that is only the base pairs that overlaps with
        /// query ranges will be returned.</param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence ranges.</param>
        /// <returns>The intersected result.</returns>
        public SequenceRangeGrouping Intersect(SequenceRangeGrouping query, long minOverlap = 1, IntersectOutputType outputType = IntersectOutputType.OverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result         = new SequenceRangeGrouping();
            List <ISequenceRange> refSeqRanges   = new List <ISequenceRange>();
            List <ISequenceRange> querySeqRanges = new List <ISequenceRange>();
            SequenceRange         range          = null;

            // merge the query sequence ranges.
            IList <ISequenceRange> queryList = null;

            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            // If the minOverlap is lessthan or equal to zero and overlapping intervals are required.
                            // then add the ref seq to result.
                            if (minOverlap <= 0 && outputType == IntersectOutputType.OverlappingIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                result.Add(range);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }
                            }

                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            if (outputType == IntersectOutputType.OverlappingPiecesOfIntervals)
                            {
                                // Add ref sequence only once for query ranges having same start and end.
                                if (previousOverlappingRange == null || (previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End))
                                {
                                    range = new SequenceRange(
                                        refRange.ID,
                                        Math.Max(queryRange.Start, refRange.Start),
                                        Math.Min(queryRange.End, refRange.End));

                                    result.Add(range);
                                    CopyOfMetadata(range, refRange);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                // Add ref sequence only once.
                                if (previousOverlappingRange == null)
                                {
                                    range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                    CopyOfMetadata(range, refRange);
                                    result.Add(range);
                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
            }

            return(result);
        }
예제 #9
0
        /// <summary>
        /// Indentify hot spot chromosomes for length anamoly regions.
        /// </summary>
        /// <param name="inputFile"> Input file</param>
        /// <param name="mean">Mean value</param>
        /// <param name="standardDeviation">Standard deviation</param>
        private void IdentifyLentghAnamolies(string filename,
             float mean = -1, float deviation = -1)
        {
            bool calculateMeanNdeviation = false;

            if (mean == -1 || deviation == -1)
            {
                calculateMeanNdeviation = true;
            }

            SequenceAlignmentMap alignmentMapobj = null;

            if (!SAMInput)
            {
                BAMParser bamParser = new BAMParser();
                alignmentMapobj = bamParser.ParseOne<SequenceAlignmentMap>(filename);
            }
            else
            {
                SAMParser samParser = new SAMParser();
                alignmentMapobj = samParser.ParseOne<SequenceAlignmentMap>(filename);
            }

            // get reads from sequence alignment map object.
            IList<PairedRead> pairedReads = null;

            if (calculateMeanNdeviation)
            {
                pairedReads = alignmentMapobj.GetPairedReads();
            }
            else
            {
                pairedReads = alignmentMapobj.GetPairedReads(mean, deviation);
            }

            // Get the orphan regions.
            var orphans = pairedReads.Where(PR => PR.PairedType == PairedReadType.Orphan);


            if (orphans.Count() == 0)
            {
                Console.WriteLine("No Orphans to display");
            }

            List<ISequenceRange> orphanRegions = new List<ISequenceRange>(orphans.Count());
            foreach (PairedRead orphanRead in orphans)
            {
                orphanRegions.Add(GetRegion(orphanRead.Read1));
            }

            // Get sequence range grouping for Orphan regions.
            SequenceRangeGrouping orphanRangegroup = new SequenceRangeGrouping(orphanRegions);

            // Get the Length anomalies regions.
            var lengthAnomalies = pairedReads.Where(PE => PE.PairedType == PairedReadType.LengthAnomaly);

            if (lengthAnomalies.Count() == 0)
            {
                Console.WriteLine("No Anomalies to display");
            }

            List<ISequenceRange> lengthAnomalyRegions = new List<ISequenceRange>(lengthAnomalies.Count());
            foreach (PairedRead laRead in lengthAnomalies)
            {
                SequenceRange range = new SequenceRange();
                range.ID = laRead.Read1.RName;
                range.Start = laRead.Read1.Pos;
                range.End = laRead.Read1.Pos + laRead.InsertLength;
                lengthAnomalyRegions.Add(range);
            }

            // Get sequence range grouping for length anomaly regions.
            SequenceRangeGrouping lengthAnomalyRangegroup =
                                new SequenceRangeGrouping(lengthAnomalyRegions);
            if (lengthAnomalyRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Length anomalies reads to display");
            }
            else
            {
                Console.Write("Region of length anomaly:");
                DisplaySequenceRange(lengthAnomalyRangegroup);
            }

            if (orphanRangegroup.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Orphan reads to display");
            }
            else
            {
                Console.Write("\r\nRegion of Orphan reads:");
                DisplaySequenceRange(orphanRangegroup);
            }

            SequenceRangeGrouping intersectedRegions =
                lengthAnomalyRangegroup.Intersect(orphanRangegroup);
            if (intersectedRegions.GroupIDs.Count() == 0)
            {
                Console.Write("\r\nNo Hot spots found");
            }
            else
            {
                Console.Write("\r\nChromosomal Hot spot of length anomaly and Orphan region:");
                DisplaySequenceRange(intersectedRegions);
            }



        }