Ejemplo n.º 1
0
        /// <summary>
        /// Intersect bins with the targeted regions defined in callset.Manifest.
        /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect.
        /// </summary>
        /// <param name="callset"></param>
        /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param>
        /// <returns></returns>
        private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath)
        {
            if (!partitionedPath.Exists) { return partitionedPath; }
            var rawPartitionedPath = partitionedPath.AppendName(".raw");
            if (rawPartitionedPath.Exists) { rawPartitionedPath.Delete(); }
            partitionedPath.MoveTo(rawPartitionedPath);

            //callset.Manifest
            Dictionary<string, List<NexteraManifest.ManifestRegion>> manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome();

            // CanvasPartition output file is in the BED format
            //   start: 0-based, inclusive
            //   end: 0-based, exclusive
            // Manifest
            //   start: 1-based, inclusive
            //   end: 1-based, inclusive
            using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName))
            using (GzipWriter writer = new GzipWriter(partitionedPath.FullName))
            {
                string currentChrom = null;
                int manifestRegionIdx = 0;
                string line;
                string[] toks;
                while ((line = reader.ReadLine()) != null)
                {
                    toks = line.Split('\t');
                    string chrom = toks[0];
                    int start = int.Parse(toks[1]) + 1; // 1-based, inclusive
                    int end = int.Parse(toks[2]); // 1-based, inclusive
                    if (chrom != currentChrom)
                    {
                        currentChrom = chrom;
                        manifestRegionIdx = 0;
                    }
                    if (!manifestRegionsByChrom.ContainsKey(currentChrom)) { continue; }
                    while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count
                        && manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -|
                    {
                        manifestRegionIdx++;
                    }
                    if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count || // |- last manifest region -| |- bin -|
                        end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -|
                    {
                        continue; // skip bin
                    }

                    // |- bin -|
                    //       |- manifest region -|
                    while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count &&
                        end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start)
                    {
                        // calculate intersection
                        int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive
                        int intersectionEnd = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End); // 1-based, inclusive
                                                                                                                          // start/end in BED format
                        toks[1] = String.Format("{0}", intersectionStart - 1); // 0-based, inclusive
                        toks[2] = String.Format("{0}", intersectionEnd); // 0-based, exclusive

                        // write intersected bin
                        writer.WriteLine(String.Join("\t", toks));

                        manifestRegionIdx++;
                    }
                }
            }

            return partitionedPath;
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Not making this an extension method so that it doesn't collide with the instance method
 /// We still want to be able to refer to it statically though
 /// </summary>
 /// <param name="source"></param>
 /// <param name="destination"></param>
 public static void MoveTo(IFileLocation source, IFileLocation destination)
 {
     source.MoveTo(destination);
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Intersect bins with the targeted regions defined in callset.Manifest.
        /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect.
        /// </summary>
        /// <param name="callset"></param>
        /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param>
        /// <returns></returns>
        private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath)
        {
            if (!partitionedPath.Exists)
            {
                return(partitionedPath);
            }
            var rawPartitionedPath = partitionedPath.AppendName(".raw");

            if (rawPartitionedPath.Exists)
            {
                rawPartitionedPath.Delete();
            }
            partitionedPath.MoveTo(rawPartitionedPath);

            //callset.Manifest
            Dictionary <string, List <NexteraManifest.ManifestRegion> > manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome();

            // CanvasPartition output file is in the BED format
            //   start: 0-based, inclusive
            //   end: 0-based, exclusive
            // Manifest
            //   start: 1-based, inclusive
            //   end: 1-based, inclusive
            using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName))
                using (GzipWriter writer = new GzipWriter(partitionedPath.FullName))
                {
                    string   currentChrom      = null;
                    int      manifestRegionIdx = 0;
                    string   line;
                    string[] toks;
                    while ((line = reader.ReadLine()) != null)
                    {
                        toks = line.Split('\t');
                        string chrom = toks[0];
                        int    start = int.Parse(toks[1]) + 1; // 1-based, inclusive
                        int    end   = int.Parse(toks[2]);     // 1-based, inclusive
                        if (chrom != currentChrom)
                        {
                            currentChrom      = chrom;
                            manifestRegionIdx = 0;
                        }
                        if (!manifestRegionsByChrom.ContainsKey(currentChrom))
                        {
                            continue;
                        }
                        while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count &&
                               manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -|
                        {
                            manifestRegionIdx++;
                        }
                        if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count ||   // |- last manifest region -| |- bin -|
                            end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -|
                        {
                            continue;                                                            // skip bin
                        }

                        // |- bin -|
                        //       |- manifest region -|
                        while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count &&
                               end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start)
                        {
                            // calculate intersection
                            int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive
                            int intersectionEnd   = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End);     // 1-based, inclusive
                                                                                                                                    // start/end in BED format
                            toks[1] = String.Format("{0}", intersectionStart - 1);                                                  // 0-based, inclusive
                            toks[2] = String.Format("{0}", intersectionEnd);                                                        // 0-based, exclusive

                            // write intersected bin
                            writer.WriteLine(String.Join("\t", toks));

                            manifestRegionIdx++;
                        }
                    }
                }

            return(partitionedPath);
        }