예제 #1
0
        /// <summary>
        /// Gets GenomicBins by chromosome as an OrderedDictionary. The order of chromosomes in path is preserved.
        /// Assumes that bins are sorted in ascending order by the start position within each chromosome.
        /// </summary>
        /// <param name="path"></param>
        /// <returns>OrderedDictionary with string key and List<GenomicBin> value</returns>
        public static OrderedDictionary <string, List <SampleGenomicBin> > GetGenomicBinsByChrom(string path)
        {
            OrderedDictionary <string, List <SampleGenomicBin> > binsByChrom = new OrderedDictionary <string, List <SampleGenomicBin> >();

            SampleGenomicBin prevBin = null;

            foreach (var bin in IterateThroughTextFile(path))
            {
                if (!binsByChrom.ContainsKey(bin.GenomicBin.Chromosome))
                {
                    binsByChrom[bin.GenomicBin.Chromosome] = new List <SampleGenomicBin>();
                    prevBin = null;
                }
                if (prevBin != null && bin.Start < prevBin.Start)
                {
                    throw new Exception("Bins are not sorted in ascending order by the start position." +
                                        $" First offending bin: {bin.GenomicBin.Chromosome}\t{bin.Start}\t{bin.Stop}");
                }

                binsByChrom[bin.GenomicBin.Chromosome].Add(bin);
                prevBin = bin;
            }

            return(binsByChrom);
        }
예제 #2
0
        public static Dictionary <string, List <SampleGenomicBin> > LoadBedFile(string bedPath, int?gcIndex = null)
        {
            Dictionary <string, List <SampleGenomicBin> > excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >();
            int count = 0;

            using (FileStream stream = new FileStream(bedPath, FileMode.Open, FileAccess.Read))
                using (StreamReader reader = new StreamReader(stream))
                {
                    while (true)
                    {
                        string fileLine = reader.ReadLine();
                        if (fileLine == null)
                        {
                            break;
                        }
                        string[] bits = fileLine.Split('\t');
                        string   chr  = bits[0];
                        if (!excludedIntervals.ContainsKey(chr))
                        {
                            excludedIntervals[chr] = new List <SampleGenomicBin>();
                        }
                        SampleGenomicBin interval = new SampleGenomicBin();
                        interval.GenomicBin.Chromosome = chr;
                        interval.Start = int.Parse(bits[1]);
                        interval.Stop  = int.Parse(bits[2]);
                        if (interval.Start < 0)
                        {
                            throw new Illumina.Common.IlluminaException(String.Format("Start must be non-negative in a BED file: {0}", fileLine));
                        }
                        if (interval.Start >= interval.Stop) // Do not allow empty intervals
                        {
                            throw new Illumina.Common.IlluminaException(String.Format("Start must be less than Stop in a BED file: {0}", fileLine));
                        }
                        if (gcIndex.HasValue && gcIndex.Value < bits.Length)
                        {
                            interval.GenomicBin.GC = int.Parse(bits[gcIndex.Value]);
                        }
                        excludedIntervals[chr].Add(interval);
                        count++;
                    }
                }
            Console.WriteLine(">>> Loaded {0} intervals for {1} sequences", count, excludedIntervals.Keys.Count);
            return(excludedIntervals);
        }
예제 #3
0
        public static IEnumerable <SampleGenomicBin> IterateThroughTextFile(string infile)
        {
            using (GzipReader reader = new GzipReader(infile))
            {
                string row;

                while ((row = reader.ReadLine()) != null)
                {
                    string[] fields = row.Split('\t');

                    string chr   = fields[0];
                    int    start = Convert.ToInt32(fields[1]);
                    int    stop  = Convert.ToInt32(fields[2]);
                    float  count = float.Parse(fields[3]);
                    int    gc    = Convert.ToInt32(fields[4]);

                    SampleGenomicBin bin = new SampleGenomicBin(chr, start, stop, gc, count);
                    yield return(bin);
                }
            }
        }
예제 #4
0
 public bool IsSameBin(SampleGenomicBin bin)
 {
     return(GenomicBin.Chromosome == bin.GenomicBin.Chromosome && GenomicBin.Interval.Start == bin.GenomicBin.Interval.Start && GenomicBin.Interval.End == bin.GenomicBin.Interval.End);
 }
예제 #5
0
 public bool SkipBin(SampleGenomicBin bin)
 {
     return(SkipBin(bin.GenomicBin.Chromosome, (uint)bin.Start, (uint)bin.Stop));
 }