/// <summary> /// Gets GenomicBins by chromosome as an OrderedDictionary. The order of chromosomes in path is preserved. /// Assumes that bins are sorted in ascending order by the start position within each chromosome. /// </summary> /// <param name="path"></param> /// <returns>OrderedDictionary with string key and List<GenomicBin> value</returns> public static OrderedDictionary <string, List <SampleGenomicBin> > GetGenomicBinsByChrom(string path) { OrderedDictionary <string, List <SampleGenomicBin> > binsByChrom = new OrderedDictionary <string, List <SampleGenomicBin> >(); SampleGenomicBin prevBin = null; foreach (var bin in IterateThroughTextFile(path)) { if (!binsByChrom.ContainsKey(bin.GenomicBin.Chromosome)) { binsByChrom[bin.GenomicBin.Chromosome] = new List <SampleGenomicBin>(); prevBin = null; } if (prevBin != null && bin.Start < prevBin.Start) { throw new Exception("Bins are not sorted in ascending order by the start position." + $" First offending bin: {bin.GenomicBin.Chromosome}\t{bin.Start}\t{bin.Stop}"); } binsByChrom[bin.GenomicBin.Chromosome].Add(bin); prevBin = bin; } return(binsByChrom); }
public static Dictionary <string, List <SampleGenomicBin> > LoadBedFile(string bedPath, int?gcIndex = null) { Dictionary <string, List <SampleGenomicBin> > excludedIntervals = new Dictionary <string, List <SampleGenomicBin> >(); int count = 0; using (FileStream stream = new FileStream(bedPath, FileMode.Open, FileAccess.Read)) using (StreamReader reader = new StreamReader(stream)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } string[] bits = fileLine.Split('\t'); string chr = bits[0]; if (!excludedIntervals.ContainsKey(chr)) { excludedIntervals[chr] = new List <SampleGenomicBin>(); } SampleGenomicBin interval = new SampleGenomicBin(); interval.GenomicBin.Chromosome = chr; interval.Start = int.Parse(bits[1]); interval.Stop = int.Parse(bits[2]); if (interval.Start < 0) { throw new Illumina.Common.IlluminaException(String.Format("Start must be non-negative in a BED file: {0}", fileLine)); } if (interval.Start >= interval.Stop) // Do not allow empty intervals { throw new Illumina.Common.IlluminaException(String.Format("Start must be less than Stop in a BED file: {0}", fileLine)); } if (gcIndex.HasValue && gcIndex.Value < bits.Length) { interval.GenomicBin.GC = int.Parse(bits[gcIndex.Value]); } excludedIntervals[chr].Add(interval); count++; } } Console.WriteLine(">>> Loaded {0} intervals for {1} sequences", count, excludedIntervals.Keys.Count); return(excludedIntervals); }
public static IEnumerable <SampleGenomicBin> IterateThroughTextFile(string infile) { using (GzipReader reader = new GzipReader(infile)) { string row; while ((row = reader.ReadLine()) != null) { string[] fields = row.Split('\t'); string chr = fields[0]; int start = Convert.ToInt32(fields[1]); int stop = Convert.ToInt32(fields[2]); float count = float.Parse(fields[3]); int gc = Convert.ToInt32(fields[4]); SampleGenomicBin bin = new SampleGenomicBin(chr, start, stop, gc, count); yield return(bin); } } }
public bool IsSameBin(SampleGenomicBin bin) { return(GenomicBin.Chromosome == bin.GenomicBin.Chromosome && GenomicBin.Interval.Start == bin.GenomicBin.Interval.Start && GenomicBin.Interval.End == bin.GenomicBin.Interval.End); }
public bool SkipBin(SampleGenomicBin bin) { return(SkipBin(bin.GenomicBin.Chromosome, (uint)bin.Start, (uint)bin.Stop)); }