/// <summary> /// Load known CN data from a .bed file. File lines have fields: /// chromosome, start, end, chromcountA, chromcountB /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields. /// </summary> /// <param name="oracleBedPath"></param> protected Dictionary<string, List<CNInterval>> LoadIntervalsFromBed(string oracleBedPath, bool getCN) { bool stripChr = false; int count = 0; long totalBases = 0; Dictionary<string, List<CNInterval>> bedIntervals = new Dictionary<string, List<CNInterval>>(); using (StreamReader reader = new StreamReader(oracleBedPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) break; if (fileLine.Length == 0 || fileLine[0] == '#') continue; string[] bits = fileLine.TrimEnd('\t').Split('\t'); if (bits.Length < 3) continue; string chromosome = bits[0]; if (stripChr) chromosome = chromosome.Replace("chr", ""); if (!bedIntervals.ContainsKey(chromosome)) bedIntervals[chromosome] = new List<CNInterval>(); CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); if (getCN) // bits.Length >= 5) { interval.CN = int.Parse(bits[3]) + int.Parse(bits[4]); } totalBases += interval.Length; bedIntervals[chromosome].Add(interval); count++; } } Console.WriteLine(">>>Loaded {0} CN intervals ({1} bases)", count, totalBases); return bedIntervals; }
/// <summary> /// Load known CN data from a .bed file. File lines have fields: /// chromosome, start, end, chromcountA, chromcountB /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields. /// </summary> /// <param name="oracleBedPath"></param> /// <param name="getCn"></param> /// <param name="heterogeneityFraction"></param> protected static Dictionary <string, List <CNInterval> > LoadIntervalsFromBed(string oracleBedPath, bool getCn, double heterogeneityFraction) { bool stripChr = false; int count = 0; long totalBases = 0; Dictionary <string, List <CNInterval> > bedIntervals = new Dictionary <string, List <CNInterval> >(); using (FileStream stream = new FileStream(oracleBedPath, FileMode.Open, FileAccess.Read)) using (StreamReader reader = new StreamReader(stream)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.TrimEnd('\t').Split('\t'); if (bits.Length < 3) { continue; } string chromosome = bits[0]; if (stripChr) { chromosome = chromosome.Replace("chr", ""); } if (!bedIntervals.ContainsKey(chromosome)) { bedIntervals[chromosome] = new List <CNInterval>(); } CNInterval interval = new CNInterval(chromosome); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); if (getCn) // bits.Length >= 5) { if (heterogeneityFraction < 1 && bits.Length > 5 && int.Parse(bits[3]) == 1 && int.Parse(bits[4]) == 1) { if (heterogeneityFraction > double.Parse(bits[5])) { continue; } } interval.Cn = int.Parse(bits[3]) + int.Parse(bits[4]); } totalBases += interval.Length; bedIntervals[chromosome].Add(interval); count++; } } Console.WriteLine(">>>Loaded {0} CN intervals ({1} bases)", count, totalBases); return(bedIntervals); }
private static CNInterval ParseCnInterval(string fileLine) { string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; CNInterval interval = new CNInterval(chromosome) { Start = int.Parse(bits[1]), Cn = -1 }; string[] infoBits = bits[7].Split(';'); foreach (string subBit in infoBits) { if (subBit.StartsWith("CN=")) { float tempCn = float.Parse(subBit.Substring(3)); if (subBit.EndsWith(".5")) { interval.Cn = (int)Math.Round(tempCn + 0.1); // round X.5 up to X+1 } else { interval.Cn = (int)Math.Round(tempCn); // Round off } } if (subBit.StartsWith("END=")) { interval.End = int.Parse(subBit.Substring(4)); } } // Parse CN from Canvas output: if (bits.Length > 8) { string[] subBits = bits[8].Split(':'); string[] subBits2 = bits[9].Split(':'); for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++) { if (subBits[subBitIndex] == "CN") { interval.Cn = int.Parse(subBits2[subBitIndex]); } } } if (interval.End == 0 || interval.Cn < 0) { throw new ArgumentException("Invalid record. End cannot be 0 and CN must be >= 0"); } return(interval); }
protected void LoadKnownCNVCF(string oracleVCFPath) { bool stripChr = false; // Load our "oracle" of known copy numbers: this.KnownCN = new Dictionary<string, List<CNInterval>>(); int count = 0; using (GzipReader reader = new GzipReader(oracleVCFPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) break; if (fileLine.Length == 0 || fileLine[0] == '#') continue; string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (stripChr) chromosome = chromosome.Replace("chr", ""); if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>(); CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.CN = -1; string[] infoBits = bits[7].Split(';'); foreach (string subBit in infoBits) { if (subBit.StartsWith("CN=")) { float tempCN = float.Parse(subBit.Substring(3)); if (subBit.EndsWith(".5")) { interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1 } else { interval.CN = (int)Math.Round(tempCN); // Round off } } if (subBit.StartsWith("END=")) { interval.End = int.Parse(subBit.Substring(4)); } } // Parse CN from Canvas output: if (bits.Length > 8) { string[] subBits = bits[8].Split(':'); string[] subBits2 = bits[9].Split(':'); for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++) { if (subBits[subBitIndex] == "CN") { interval.CN = int.Parse(subBits2[subBitIndex]); } } } if (interval.End == 0 || interval.CN < 0) { Console.WriteLine("Error - bogus record!"); Console.WriteLine(fileLine); } else { KnownCN[chromosome].Add(interval); count++; } } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }
protected void LoadKnownCNVCF(string oracleVCFPath) { bool stripChr = false; // Load our "oracle" of known copy numbers: this.KnownCN = new Dictionary <string, List <CNInterval> >(); int count = 0; using (GzipReader reader = new GzipReader(oracleVCFPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (stripChr) { chromosome = chromosome.Replace("chr", ""); } if (!KnownCN.ContainsKey(chromosome)) { KnownCN[chromosome] = new List <CNInterval>(); } CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.CN = -1; string[] infoBits = bits[7].Split(';'); foreach (string subBit in infoBits) { if (subBit.StartsWith("CN=")) { float tempCN = float.Parse(subBit.Substring(3)); if (subBit.EndsWith(".5")) { interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1 } else { interval.CN = (int)Math.Round(tempCN); // Round off } } if (subBit.StartsWith("END=")) { interval.End = int.Parse(subBit.Substring(4)); } } // Parse CN from Canvas output: if (bits.Length > 8) { string[] subBits = bits[8].Split(':'); string[] subBits2 = bits[9].Split(':'); for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++) { if (subBits[subBitIndex] == "CN") { interval.CN = int.Parse(subBits2[subBitIndex]); } } } if (interval.End == 0 || interval.CN < 0) { Console.WriteLine("Error - bogus record!"); Console.WriteLine(fileLine); } else { KnownCN[chromosome].Add(interval); count++; } } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }