Example #1
0
        /// <summary>
        /// Validate BED Operations(Merge,Intersect)..
        /// </summary>
        /// <param name="nodeName">Xml Node name for different inputs.</param>
        /// <param name="operationPam">Different Bed operations.</param>
        /// <param name="overlappingBasePair">overlapping base pair</param>
        /// <param name="IsParentSeqRangeRequired">Is Parent Sequence Range required?</param>
        void ValidateBedOperations(string nodeName,
                                   BedOperationsParameters operationPam,
                                   bool overlappingBasePair, bool IsParentSeqRangeRequired)
        {
            // Get values from xml.
            string expectedRangeIDs = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.IDNode);
            string expectedStartIndex = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.StartNode);
            string expectedEndIndex = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.EndNode);
            string referenceFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string queryFilePath = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.QueryFilePath);
            string minimalOverlap = utilityObj.xmlUtil.GetTextValue(
                nodeName, Constants.OverlapValue);
            string rangeID = string.Empty;
            bool   result  = false;

            List <ISequenceRange> rangeList       = null;
            SequenceRangeGrouping operationResult = null;

            // Parse a BED file.
            BedParser             parserObj      = new BedParser();
            SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath);
            SequenceRangeGrouping queryGroup     = parserObj.ParseRangeGrouping(queryFilePath);

            IntersectOutputType intersectOutputType = IntersectOutputType.OverlappingIntervals;

            if (overlappingBasePair)
            {
                intersectOutputType = IntersectOutputType.OverlappingPiecesOfIntervals;
            }

            SubtractOutputType subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals;

            if (overlappingBasePair)
            {
                subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap;
            }

            switch (operationPam)
            {
            case BedOperationsParameters.Merge:
                operationResult = referenceGroup.MergeOverlaps();
                break;

            case BedOperationsParameters.MergeWithPam:
                operationResult = referenceGroup.MergeOverlaps(queryGroup,
                                                               0, IsParentSeqRangeRequired);
                break;

            case BedOperationsParameters.Intersect:

                operationResult = referenceGroup.Intersect(queryGroup,
                                                           long.Parse(minimalOverlap, (IFormatProvider)null), intersectOutputType, IsParentSeqRangeRequired);
                break;

            case BedOperationsParameters.MergeQueryWithReference:
                operationResult = queryGroup.MergeOverlaps(referenceGroup,
                                                           0, IsParentSeqRangeRequired);
                break;

            case BedOperationsParameters.Subtract:
                operationResult = referenceGroup.Subtract(queryGroup,
                                                          long.Parse(minimalOverlap, (IFormatProvider)null), subtractOutputType, IsParentSeqRangeRequired);
                break;

            default:
                break;
            }

            // Get a result SequenceGroup Id.
            IEnumerable <string> groupId = operationResult.GroupIDs;

            string[] expectedRangeIdsArray   = expectedRangeIDs.Split(',');
            string[] expectedStartIndexArray = expectedStartIndex.Split(',');
            string[] expectedEndIndexArray   = expectedEndIndex.Split(',');
            int      i = 0;

            foreach (string grpID in groupId)
            {
                rangeID = grpID;

                rangeList = operationResult.GetGroup(rangeID);

                // Validate result sequence range.
                foreach (ISequenceRange range in rangeList)
                {
                    Assert.AreEqual(expectedRangeIdsArray[i], range.ID);
                    Assert.AreEqual(expectedStartIndexArray[i], range.Start.ToString((IFormatProvider)null));
                    Assert.AreEqual(expectedEndIndexArray[i], range.End.ToString((IFormatProvider)null));
                    i++;
                }
            }

            // Validate ParentSeqRange.
            result = ValidateParentSeqRange(operationResult, referenceGroup,
                                            queryGroup, IsParentSeqRangeRequired);
            Assert.IsTrue(result);

            ApplicationLog.WriteLine(
                "Bed Operations BVT: Successfully validated the BED SequenceID, Start and End Ranges");
        }
Example #2
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        /// 
        /// For example,
        /// 
        ///  Ranges in this instance   Ranges in the query 
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///    
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///    
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///   
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. 
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping 
        /// pieces of intervels along with non overlapping ranges from this instance 
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from 
        /// which the new sequence range is created are added to the ParentSeqRanges property of the 
        /// new sequence range.</param>
        /// <returns>The resultant Sequence range grouping.</returns>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
            long minOverlap = 1,
            SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals,
            bool isParentSeqRangesRequired = false)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }

            SequenceRangeGrouping result = new SequenceRangeGrouping();
            List<ISequenceRange> refSeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> querySeqRanges = new List<ISequenceRange>();
            List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>();
            SequenceRange range = null;

            // merge the query sequence ranges.
            IList<ISequenceRange> queryList = null;
            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(groups[id]);

                if (query.groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query.groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                            querySeqRanges,
                                                                                            minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping 
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return result;
        }
Example #3
0
        /// <summary>
        /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping.
        ///
        /// For example,
        ///
        ///  Ranges in this instance   Ranges in the query
        ///     1 to  4                   2 to  6
        ///     4 to  8                   3 to  6
        ///     8 to 12                   9 to 14
        ///    25 to 35
        ///
        /// Result for minOverlap set to 1
        /// 1. If outputType is IntervalsWithNoOverlap
        ///    25 to 35
        ///
        /// 2. If outputType is NonOverlappingPiecesOfIntervals
        ///    1 to  2
        ///    6 to  8
        ///    8 to  9
        ///   25 to 35
        ///
        /// Running this method creates all new ISequenceRange objects and adds them
        /// to the newly created SequenceRangeGrouping returned here.
        /// </summary>
        /// <param name="query">Query grouping.</param>
        /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param>
        /// <param name="outputType">
        /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals.
        /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping
        /// pieces of intervels along with non overlapping ranges from this instance
        /// will be returned.
        /// </param>
        /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from
        /// which the new sequence range is created are added to the ParentSeqRanges property of the
        /// new sequence range.</param>
        public SequenceRangeGrouping Subtract(SequenceRangeGrouping query,
                                              long minOverlap = 1,
                                              SubtractOutputType outputType  = SubtractOutputType.NonOverlappingPiecesOfIntervals,
                                              bool isParentSeqRangesRequired = false)
        {
            SequenceRangeGrouping result            = new SequenceRangeGrouping();
            List <ISequenceRange> refSeqRanges      = new List <ISequenceRange>();
            List <ISequenceRange> querySeqRanges    = new List <ISequenceRange>();
            List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>();
            SequenceRange         range             = null;

            // merge the query sequence ranges.
            IList <ISequenceRange> queryList = null;

            if (isParentSeqRangesRequired)
            {
                queryList = query.Flatten();
            }

            query = query.MergeOverlaps(0, isParentSeqRangesRequired);

            foreach (string id in _groups.Keys)
            {
                refSeqRanges.Clear();
                querySeqRanges.Clear();

                refSeqRanges.AddRange(_groups[id]);

                if (query._groups.ContainsKey(id))
                {
                    querySeqRanges.AddRange(query._groups[id]);
                    querySeqRanges.Sort();
                }

                if (querySeqRanges.Count > 0)
                {
                    foreach (ISequenceRange refRange in refSeqRanges)
                    {
                        previousSeqRanges.Clear();
                        IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange,
                                                                                             querySeqRanges,
                                                                                             minOverlap);

                        if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0)
                        {
                            if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals)
                            {
                                range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                                CopyOfMetadata(range, refRange);

                                if (isParentSeqRangesRequired)
                                {
                                    AddParent(range, refRange);
                                }

                                result.Add(range);
                            }

                            continue;
                        }

                        // no need to proceed if only non overlapping intervels needed.
                        if (outputType == SubtractOutputType.IntervalsWithNoOverlap)
                        {
                            continue;
                        }

                        ISequenceRange previousOverlappingRange = null;
                        foreach (ISequenceRange queryRange in overlappingQueryRanges)
                        {
                            // in case of non overlapping pieces of intervals get the non overlapping
                            // ranges from reference sequence range.
                            if (refRange.Start < queryRange.Start)
                            {
                                if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start)
                                {
                                    // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges.
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        for (int i = previousSeqRanges.Count - 1; i >= 0; i--)
                                        {
                                            if (previousSeqRanges[i].End > queryRange.Start)
                                            {
                                                previousSeqRanges[i].End = queryRange.Start;
                                            }
                                            else if (previousSeqRanges[i].End < queryRange.Start)
                                            {
                                                previousSeqRanges.RemoveAt(i);
                                            }
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(previousSeqRanges[0], queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                    {
                                        range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start);
                                        result.Add(range);
                                        CopyOfMetadata(range, refRange);

                                        if (isParentSeqRangesRequired)
                                        {
                                            AddParent(range, refRange);
                                        }
                                    }

                                    if (isParentSeqRangesRequired)
                                    {
                                        if (queryList.Contains(queryRange))
                                        {
                                            AddParent(range, queryRange);
                                        }
                                        else
                                        {
                                            if (queryRange.ParentSeqRanges.Count > 0)
                                            {
                                                AddParent(range, queryRange.ParentSeqRanges);
                                            }
                                        }
                                    }
                                }
                            }

                            if (queryRange.End < refRange.End)
                            {
                                if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End)
                                {
                                    range = new SequenceRange(refRange.ID, queryRange.End, refRange.End);
                                    CopyOfMetadata(range, refRange);

                                    result.Add(range);
                                    previousSeqRanges.Add(range);

                                    if (isParentSeqRangesRequired)
                                    {
                                        AddParent(range, refRange);
                                    }
                                }

                                if (isParentSeqRangesRequired)
                                {
                                    if (queryList.Contains(queryRange))
                                    {
                                        AddParent(range, queryRange);
                                    }
                                    else
                                    {
                                        if (queryRange.ParentSeqRanges.Count > 0)
                                        {
                                            AddParent(range, queryRange.ParentSeqRanges);
                                        }
                                    }
                                }
                            }

                            previousOverlappingRange = queryRange;
                        }
                    }
                }
                else
                {
                    foreach (SequenceRange refRange in refSeqRanges)
                    {
                        range = new SequenceRange(refRange.ID, refRange.Start, refRange.End);
                        CopyOfMetadata(range, refRange);
                        result.Add(range);

                        if (isParentSeqRangesRequired)
                        {
                            AddParent(range, refRange);
                        }
                    }
                }
            }

            return(result);
        }
Example #4
0
        /// <summary>
        /// Validate Subtract SequenceRangeGrouping.
        /// </summary>
        /// <param name="nodeName">Xml Node name for different inputs.</param>
        /// <param name="overlappingBasePair">Value of overlappingBasePair</param>
        /// <param name="IsParentSeqRangesRequired">Is Parent Sequence Range required?</param>
        void SubtractSequenceRange(string nodeName,
                                   bool overlappingBasePair, bool IsParentSeqRangeRequired)
        {
            // Get values from xml.
            string[] expectedRangeIDs = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.IDNode).Split(',');
            string[] expectedStartIndex = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.StartNode).Split(',');
            string[] expectedEndIndex = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.EndNode).Split(',');
            string referenceFilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.FilePathNode);
            string queryFilePath = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.QueryFilePath);
            string minimalOverlap = _utilityObj._xmlUtil.GetTextValue(
                nodeName, Constants.OverlapValue);
            string rangeID = string.Empty;
            bool   result  = false;

            List <ISequenceRange> rangeList = null;

            // Parse a BED file.
            BedParser             parserObj      = new BedParser();
            SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath);
            SequenceRangeGrouping queryGroup     = parserObj.ParseRangeGrouping(queryFilePath);

            SubtractOutputType subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals;

            if (overlappingBasePair)
            {
                subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap;
            }

            // Subtract a SequenceRangeGroup.
            SequenceRangeGrouping subtractGroup = referenceGroup.Subtract(queryGroup,
                                                                          long.Parse(minimalOverlap, (IFormatProvider)null), subtractOutputType, IsParentSeqRangeRequired);

            // Get a intersect SequenceGroup Id.
            IEnumerable <string> groupIds = subtractGroup.GroupIDs;

            int j = 0;

            foreach (string grpID in groupIds)
            {
                rangeID = grpID;

                rangeList = subtractGroup.GetGroup(rangeID);

                // Validate intersect sequence range.
                foreach (ISequenceRange range in rangeList)
                {
                    Assert.AreEqual(expectedStartIndex[j], range.Start.ToString((IFormatProvider)null));
                    Assert.AreEqual(expectedEndIndex[j], range.End.ToString((IFormatProvider)null));
                    Assert.AreEqual(expectedRangeIDs[j], range.ID.ToString((IFormatProvider)null));
                    j++;
                }
            }

            // Validate ParentSeqRanges.
            result = ValidateParentSeqRange(
                subtractGroup, referenceGroup, queryGroup, IsParentSeqRangeRequired);
            Assert.IsTrue(result);

            ApplicationLog.WriteLine(
                "Bed Parser BVT: Successfully validated the subtract SequeID, Start and End Ranges");
            Console.WriteLine(string.Format((IFormatProvider)null,
                                            "Bed Parser BVT: Successfully validated the subtracted SequeID, Start and End Ranges"));
        }