/// <summary> /// Intersect bins with the targeted regions defined in callset.Manifest. /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect. /// </summary> /// <param name="callset"></param> /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param> /// <returns></returns> private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath) { if (!partitionedPath.Exists) { return partitionedPath; } var rawPartitionedPath = partitionedPath.AppendName(".raw"); if (rawPartitionedPath.Exists) { rawPartitionedPath.Delete(); } partitionedPath.MoveTo(rawPartitionedPath); //callset.Manifest Dictionary<string, List<NexteraManifest.ManifestRegion>> manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome(); // CanvasPartition output file is in the BED format // start: 0-based, inclusive // end: 0-based, exclusive // Manifest // start: 1-based, inclusive // end: 1-based, inclusive using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName)) using (GzipWriter writer = new GzipWriter(partitionedPath.FullName)) { string currentChrom = null; int manifestRegionIdx = 0; string line; string[] toks; while ((line = reader.ReadLine()) != null) { toks = line.Split('\t'); string chrom = toks[0]; int start = int.Parse(toks[1]) + 1; // 1-based, inclusive int end = int.Parse(toks[2]); // 1-based, inclusive if (chrom != currentChrom) { currentChrom = chrom; manifestRegionIdx = 0; } if (!manifestRegionsByChrom.ContainsKey(currentChrom)) { continue; } while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -| { manifestRegionIdx++; } if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count || // |- last manifest region -| |- bin -| end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -| { continue; // skip bin } // |- bin -| // |- manifest region -| while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) { // calculate intersection int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive int intersectionEnd = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End); // 1-based, inclusive // start/end in BED format toks[1] = String.Format("{0}", intersectionStart - 1); // 0-based, inclusive toks[2] = String.Format("{0}", intersectionEnd); // 0-based, exclusive // write intersected bin writer.WriteLine(String.Join("\t", toks)); manifestRegionIdx++; } } } return partitionedPath; }
/// <summary> /// Not making this an extension method so that it doesn't collide with the instance method /// We still want to be able to refer to it statically though /// </summary> /// <param name="source"></param> /// <param name="destination"></param> public static void MoveTo(IFileLocation source, IFileLocation destination) { source.MoveTo(destination); }
/// <summary> /// Intersect bins with the targeted regions defined in callset.Manifest. /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect. /// </summary> /// <param name="callset"></param> /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param> /// <returns></returns> private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath) { if (!partitionedPath.Exists) { return(partitionedPath); } var rawPartitionedPath = partitionedPath.AppendName(".raw"); if (rawPartitionedPath.Exists) { rawPartitionedPath.Delete(); } partitionedPath.MoveTo(rawPartitionedPath); //callset.Manifest Dictionary <string, List <NexteraManifest.ManifestRegion> > manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome(); // CanvasPartition output file is in the BED format // start: 0-based, inclusive // end: 0-based, exclusive // Manifest // start: 1-based, inclusive // end: 1-based, inclusive using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName)) using (GzipWriter writer = new GzipWriter(partitionedPath.FullName)) { string currentChrom = null; int manifestRegionIdx = 0; string line; string[] toks; while ((line = reader.ReadLine()) != null) { toks = line.Split('\t'); string chrom = toks[0]; int start = int.Parse(toks[1]) + 1; // 1-based, inclusive int end = int.Parse(toks[2]); // 1-based, inclusive if (chrom != currentChrom) { currentChrom = chrom; manifestRegionIdx = 0; } if (!manifestRegionsByChrom.ContainsKey(currentChrom)) { continue; } while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -| { manifestRegionIdx++; } if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count || // |- last manifest region -| |- bin -| end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -| { continue; // skip bin } // |- bin -| // |- manifest region -| while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count && end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) { // calculate intersection int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive int intersectionEnd = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End); // 1-based, inclusive // start/end in BED format toks[1] = String.Format("{0}", intersectionStart - 1); // 0-based, inclusive toks[2] = String.Format("{0}", intersectionEnd); // 0-based, exclusive // write intersected bin writer.WriteLine(String.Join("\t", toks)); manifestRegionIdx++; } } } return(partitionedPath); }