/// <summary> /// Validate BED Operations(Merge,Intersect).. /// </summary> /// <param name="nodeName">Xml Node name for different inputs.</param> /// <param name="operationPam">Different Bed operations.</param> /// <param name="overlappingBasePair">overlapping base pair</param> /// <param name="IsParentSeqRangeRequired">Is Parent Sequence Range required?</param> void ValidateBedOperations(string nodeName, BedOperationsParameters operationPam, bool overlappingBasePair, bool IsParentSeqRangeRequired) { // Get values from xml. string expectedRangeIDs = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.IDNode); string expectedStartIndex = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.StartNode); string expectedEndIndex = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.EndNode); string referenceFilePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string queryFilePath = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.QueryFilePath); string minimalOverlap = utilityObj.xmlUtil.GetTextValue( nodeName, Constants.OverlapValue); string rangeID = string.Empty; bool result = false; List <ISequenceRange> rangeList = null; SequenceRangeGrouping operationResult = null; // Parse a BED file. BedParser parserObj = new BedParser(); SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath); SequenceRangeGrouping queryGroup = parserObj.ParseRangeGrouping(queryFilePath); IntersectOutputType intersectOutputType = IntersectOutputType.OverlappingIntervals; if (overlappingBasePair) { intersectOutputType = IntersectOutputType.OverlappingPiecesOfIntervals; } SubtractOutputType subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals; if (overlappingBasePair) { subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap; } switch (operationPam) { case BedOperationsParameters.Merge: operationResult = referenceGroup.MergeOverlaps(); break; case BedOperationsParameters.MergeWithPam: operationResult = referenceGroup.MergeOverlaps(queryGroup, 0, IsParentSeqRangeRequired); break; case BedOperationsParameters.Intersect: operationResult = referenceGroup.Intersect(queryGroup, long.Parse(minimalOverlap, (IFormatProvider)null), intersectOutputType, IsParentSeqRangeRequired); break; case BedOperationsParameters.MergeQueryWithReference: operationResult = queryGroup.MergeOverlaps(referenceGroup, 0, IsParentSeqRangeRequired); break; case BedOperationsParameters.Subtract: operationResult = referenceGroup.Subtract(queryGroup, long.Parse(minimalOverlap, (IFormatProvider)null), subtractOutputType, IsParentSeqRangeRequired); break; default: break; } // Get a result SequenceGroup Id. IEnumerable <string> groupId = operationResult.GroupIDs; string[] expectedRangeIdsArray = expectedRangeIDs.Split(','); string[] expectedStartIndexArray = expectedStartIndex.Split(','); string[] expectedEndIndexArray = expectedEndIndex.Split(','); int i = 0; foreach (string grpID in groupId) { rangeID = grpID; rangeList = operationResult.GetGroup(rangeID); // Validate result sequence range. foreach (ISequenceRange range in rangeList) { Assert.AreEqual(expectedRangeIdsArray[i], range.ID); Assert.AreEqual(expectedStartIndexArray[i], range.Start.ToString((IFormatProvider)null)); Assert.AreEqual(expectedEndIndexArray[i], range.End.ToString((IFormatProvider)null)); i++; } } // Validate ParentSeqRange. result = ValidateParentSeqRange(operationResult, referenceGroup, queryGroup, IsParentSeqRangeRequired); Assert.IsTrue(result); ApplicationLog.WriteLine( "Bed Operations BVT: Successfully validated the BED SequenceID, Start and End Ranges"); }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> /// <returns>The resultant Sequence range grouping.</returns> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { if (query == null) { throw new ArgumentNullException("query"); } SequenceRangeGrouping result = new SequenceRangeGrouping(); List<ISequenceRange> refSeqRanges = new List<ISequenceRange>(); List<ISequenceRange> querySeqRanges = new List<ISequenceRange>(); List<ISequenceRange> previousSeqRanges = new List<ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList<ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(groups[id]); if (query.groups.ContainsKey(id)) { querySeqRanges.AddRange(query.groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList<ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return result; }
/// <summary> /// Subtracts the query SequenceRangeGrouping from this SequenceRangeGrouping. /// /// For example, /// /// Ranges in this instance Ranges in the query /// 1 to 4 2 to 6 /// 4 to 8 3 to 6 /// 8 to 12 9 to 14 /// 25 to 35 /// /// Result for minOverlap set to 1 /// 1. If outputType is IntervalsWithNoOverlap /// 25 to 35 /// /// 2. If outputType is NonOverlappingPiecesOfIntervals /// 1 to 2 /// 6 to 8 /// 8 to 9 /// 25 to 35 /// /// Running this method creates all new ISequenceRange objects and adds them /// to the newly created SequenceRangeGrouping returned here. /// </summary> /// <param name="query">Query grouping.</param> /// <param name="minOverlap">Minmum length of overlap. By default this will be set to 1</param> /// <param name="outputType"> /// Type of output required, IntervalsWithNoOverlap or NonOverlappingPiecesOfIntervals. /// By default this will be set to NonOverlappingPiecesOfIntervals that is non overlapping /// pieces of intervels along with non overlapping ranges from this instance /// will be returned. /// </param> /// <param name="isParentSeqRangesRequired">If this flag is set to true then the sequence ranges from /// which the new sequence range is created are added to the ParentSeqRanges property of the /// new sequence range.</param> public SequenceRangeGrouping Subtract(SequenceRangeGrouping query, long minOverlap = 1, SubtractOutputType outputType = SubtractOutputType.NonOverlappingPiecesOfIntervals, bool isParentSeqRangesRequired = false) { SequenceRangeGrouping result = new SequenceRangeGrouping(); List <ISequenceRange> refSeqRanges = new List <ISequenceRange>(); List <ISequenceRange> querySeqRanges = new List <ISequenceRange>(); List <ISequenceRange> previousSeqRanges = new List <ISequenceRange>(); SequenceRange range = null; // merge the query sequence ranges. IList <ISequenceRange> queryList = null; if (isParentSeqRangesRequired) { queryList = query.Flatten(); } query = query.MergeOverlaps(0, isParentSeqRangesRequired); foreach (string id in _groups.Keys) { refSeqRanges.Clear(); querySeqRanges.Clear(); refSeqRanges.AddRange(_groups[id]); if (query._groups.ContainsKey(id)) { querySeqRanges.AddRange(query._groups[id]); querySeqRanges.Sort(); } if (querySeqRanges.Count > 0) { foreach (ISequenceRange refRange in refSeqRanges) { previousSeqRanges.Clear(); IList <ISequenceRange> overlappingQueryRanges = GetOverlappingRenges(refRange, querySeqRanges, minOverlap); if (overlappingQueryRanges == null || overlappingQueryRanges.Count == 0) { if (minOverlap > 0 || outputType == SubtractOutputType.NonOverlappingPiecesOfIntervals) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } result.Add(range); } continue; } // no need to proceed if only non overlapping intervels needed. if (outputType == SubtractOutputType.IntervalsWithNoOverlap) { continue; } ISequenceRange previousOverlappingRange = null; foreach (ISequenceRange queryRange in overlappingQueryRanges) { // in case of non overlapping pieces of intervals get the non overlapping // ranges from reference sequence range. if (refRange.Start < queryRange.Start) { if (previousSeqRanges.Count > 0 && previousSeqRanges[0].Start < queryRange.Start) { // if the previous overlapping range's start and end are equal then no need to change the metadataSeqRanges. if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { for (int i = previousSeqRanges.Count - 1; i >= 0; i--) { if (previousSeqRanges[i].End > queryRange.Start) { previousSeqRanges[i].End = queryRange.Start; } else if (previousSeqRanges[i].End < queryRange.Start) { previousSeqRanges.RemoveAt(i); } } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(previousSeqRanges[0], queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(previousSeqRanges[0], queryRange.ParentSeqRanges); } } } } else { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, refRange.Start, queryRange.Start); result.Add(range); CopyOfMetadata(range, refRange); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } } if (queryRange.End < refRange.End) { if (previousOverlappingRange == null || previousOverlappingRange.Start != queryRange.Start && previousOverlappingRange.End != queryRange.End) { range = new SequenceRange(refRange.ID, queryRange.End, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); previousSeqRanges.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } if (isParentSeqRangesRequired) { if (queryList.Contains(queryRange)) { AddParent(range, queryRange); } else { if (queryRange.ParentSeqRanges.Count > 0) { AddParent(range, queryRange.ParentSeqRanges); } } } } previousOverlappingRange = queryRange; } } } else { foreach (SequenceRange refRange in refSeqRanges) { range = new SequenceRange(refRange.ID, refRange.Start, refRange.End); CopyOfMetadata(range, refRange); result.Add(range); if (isParentSeqRangesRequired) { AddParent(range, refRange); } } } } return(result); }
/// <summary> /// Validate Subtract SequenceRangeGrouping. /// </summary> /// <param name="nodeName">Xml Node name for different inputs.</param> /// <param name="overlappingBasePair">Value of overlappingBasePair</param> /// <param name="IsParentSeqRangesRequired">Is Parent Sequence Range required?</param> void SubtractSequenceRange(string nodeName, bool overlappingBasePair, bool IsParentSeqRangeRequired) { // Get values from xml. string[] expectedRangeIDs = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.IDNode).Split(','); string[] expectedStartIndex = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.StartNode).Split(','); string[] expectedEndIndex = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.EndNode).Split(','); string referenceFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.FilePathNode); string queryFilePath = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.QueryFilePath); string minimalOverlap = _utilityObj._xmlUtil.GetTextValue( nodeName, Constants.OverlapValue); string rangeID = string.Empty; bool result = false; List <ISequenceRange> rangeList = null; // Parse a BED file. BedParser parserObj = new BedParser(); SequenceRangeGrouping referenceGroup = parserObj.ParseRangeGrouping(referenceFilePath); SequenceRangeGrouping queryGroup = parserObj.ParseRangeGrouping(queryFilePath); SubtractOutputType subtractOutputType = SubtractOutputType.NonOverlappingPiecesOfIntervals; if (overlappingBasePair) { subtractOutputType = SubtractOutputType.IntervalsWithNoOverlap; } // Subtract a SequenceRangeGroup. SequenceRangeGrouping subtractGroup = referenceGroup.Subtract(queryGroup, long.Parse(minimalOverlap, (IFormatProvider)null), subtractOutputType, IsParentSeqRangeRequired); // Get a intersect SequenceGroup Id. IEnumerable <string> groupIds = subtractGroup.GroupIDs; int j = 0; foreach (string grpID in groupIds) { rangeID = grpID; rangeList = subtractGroup.GetGroup(rangeID); // Validate intersect sequence range. foreach (ISequenceRange range in rangeList) { Assert.AreEqual(expectedStartIndex[j], range.Start.ToString((IFormatProvider)null)); Assert.AreEqual(expectedEndIndex[j], range.End.ToString((IFormatProvider)null)); Assert.AreEqual(expectedRangeIDs[j], range.ID.ToString((IFormatProvider)null)); j++; } } // Validate ParentSeqRanges. result = ValidateParentSeqRange( subtractGroup, referenceGroup, queryGroup, IsParentSeqRangeRequired); Assert.IsTrue(result); ApplicationLog.WriteLine( "Bed Parser BVT: Successfully validated the subtract SequeID, Start and End Ranges"); Console.WriteLine(string.Format((IFormatProvider)null, "Bed Parser BVT: Successfully validated the subtracted SequeID, Start and End Ranges")); }