/// <summary> /// Load known CN data from a .bed file. File lines have fields: /// chromosome, start, end, chromcountA, chromcountB /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields. /// </summary> /// <param name="oracleBedPath"></param> protected void LoadKnownCNBed(string oracleBedPath) { bool stripChr = false; int count = 0; this.KnownCN = new Dictionary<string, List<CNInterval>>(); using (StreamReader reader = new StreamReader(oracleBedPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) break; if (fileLine.Length == 0 || fileLine[0] == '#') continue; string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (stripChr) chromosome = chromosome.Replace("chr", ""); if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>(); CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); interval.CN = int.Parse(bits[3]) + int.Parse(bits[4]); KnownCN[chromosome].Add(interval); count++; } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }
/// <summary> /// Load known CN data from a .bed file. File lines have fields: /// chromosome, start, end, chromcountA, chromcountB /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields. /// </summary> /// <param name="oracleBedPath"></param> protected void LoadKnownCNBed(string oracleBedPath) { bool stripChr = false; int count = 0; this.KnownCN = new Dictionary <string, List <CNInterval> >(); using (FileStream stream = new FileStream(oracleBedPath, FileMode.Open, FileAccess.Read)) using (StreamReader reader = new StreamReader(stream)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (stripChr) { chromosome = chromosome.Replace("chr", ""); } if (!KnownCN.ContainsKey(chromosome)) { KnownCN[chromosome] = new List <CNInterval>(); } CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); interval.CN = int.Parse(bits[3]) + int.Parse(bits[4]); if (bits.Length > 5) { interval.Heterogeneity = double.Parse(bits[5]); } else { interval.Heterogeneity = -1.0; } KnownCN[chromosome].Add(interval); count++; } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }
protected void LoadKnownCNVCF(string oracleVCFPath) { bool stripChr = false; // Load our "oracle" of known copy numbers: this.KnownCN = new Dictionary <string, List <CNInterval> >(); int count = 0; using (GzipReader reader = new GzipReader(oracleVCFPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.Split('\t'); if (bits.Length == 1 && bits[0].Trim().Length == 0) { continue; // skip empty lines! } string chromosome = bits[0]; if (stripChr) { chromosome = chromosome.Replace("chr", ""); } if (!KnownCN.ContainsKey(chromosome)) { KnownCN[chromosome] = new List <CNInterval>(); } CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.CN = -1; string[] infoBits = bits[7].Split(';'); foreach (string subBit in infoBits) { if (subBit.StartsWith("CN=")) { float tempCN = float.Parse(subBit.Substring(3)); if (subBit.EndsWith(".5")) { interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1 } else { interval.CN = (int)Math.Round(tempCN); // Round off } } if (subBit.StartsWith("END=")) { interval.End = int.Parse(subBit.Substring(4)); } } // Parse CN from Canvas output: if (bits.Length > 8) { string[] subBits = bits[8].Split(':'); string[] subBits2 = bits[9].Split(':'); for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++) { if (subBits[subBitIndex] == "CN") { interval.CN = int.Parse(subBits2[subBitIndex]); } } } if (interval.End == 0 || interval.CN < 0) { Console.WriteLine("Error - bogus record!"); Console.WriteLine(fileLine); } else { KnownCN[chromosome].Add(interval); count++; } } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }
protected void LoadKnownCNVCF(string oracleVCFPath) { bool stripChr = false; // Load our "oracle" of known copy numbers: this.KnownCN = new Dictionary<string, List<CNInterval>>(); int count = 0; using (GzipReader reader = new GzipReader(oracleVCFPath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) break; if (fileLine.Length == 0 || fileLine[0] == '#') continue; string[] bits = fileLine.Split('\t'); if (bits.Length == 1 && bits[0].Trim().Length == 0) continue; // skip empty lines! string chromosome = bits[0]; if (stripChr) chromosome = chromosome.Replace("chr", ""); if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>(); CNInterval interval = new CNInterval(); interval.Start = int.Parse(bits[1]); interval.CN = -1; string[] infoBits = bits[7].Split(';'); foreach (string subBit in infoBits) { if (subBit.StartsWith("CN=")) { float tempCN = float.Parse(subBit.Substring(3)); if (subBit.EndsWith(".5")) { interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1 } else { interval.CN = (int)Math.Round(tempCN); // Round off } } if (subBit.StartsWith("END=")) { interval.End = int.Parse(subBit.Substring(4)); } } // Parse CN from Canvas output: if (bits.Length > 8) { string[] subBits = bits[8].Split(':'); string[] subBits2 = bits[9].Split(':'); for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++) { if (subBits[subBitIndex] == "CN") { interval.CN = int.Parse(subBits2[subBitIndex]); } } } if (interval.End == 0 || interval.CN < 0) { Console.WriteLine("Error - bogus record!"); Console.WriteLine(fileLine); } else { KnownCN[chromosome].Add(interval); count++; } } } Console.WriteLine(">>>Loaded {0} known-CN intervals", count); }
/// <summary> /// Developer debug method: /// - Split each truth interval to have at least the same segmentation as the called segments /// (Note: We are intentionally ignoring segments - or parts thereof - called in areas not defined in the Truth set) /// - For each QScore method: /// - Report these new intervals and associated QScores to an extended report output file /// - Generate ROC output /// </summary> private void GenerateExtendedReportVersusKnownCN() { Dictionary<string, List<CNInterval>> resegmentedKnownCN = new Dictionary<string, List<CNInterval>>(); // Copy KnownCN entries to working container foreach (string chr in this.CNOracle.KnownCN.Keys) { resegmentedKnownCN[chr] = new List<CNInterval>(); foreach (CNInterval interval in this.CNOracle.KnownCN[chr]) { CNInterval newInterval = new CNInterval(); newInterval.Start = interval.Start; newInterval.End = interval.End; newInterval.CN = interval.CN; resegmentedKnownCN[chr].Add(newInterval); } } // Split each truth interval to match the segments' breakpoints foreach (CanvasSegment segment in this.Segments) { if (!resegmentedKnownCN.ContainsKey(segment.Chr)) continue; for (int i = 0; i < resegmentedKnownCN[segment.Chr].Count; i++) // Using for loop instead of foreach because we add items to the list { CNInterval interval = resegmentedKnownCN[segment.Chr][i]; if (interval.Start == segment.Begin && interval.End == segment.End) break; // perfect segment-knownCN match if (interval.Start >= segment.End || interval.End <= segment.Begin) continue; // segment completely outside this knownCN // If necessary, split interval at segment.Begin position (i.e. extract sub-interval preceding segment) if (segment.Begin > interval.Start) { CNInterval newInterval = new CNInterval(); newInterval.Start = interval.Start; newInterval.End = segment.Begin; newInterval.CN = interval.CN; interval.Start = newInterval.End; resegmentedKnownCN[segment.Chr].Add(newInterval); } // If necessary, split interval at segment.End position (i.e. extract sub-interval following segment) if (segment.End < interval.End) { CNInterval newInterval = new CNInterval(); newInterval.Start = segment.End; newInterval.End = interval.End; newInterval.CN = interval.CN; interval.End = newInterval.Start; resegmentedKnownCN[segment.Chr].Add(newInterval); } } } // Sort list of new intervals by starting position, just for prettiness foreach (List<CNInterval> list in resegmentedKnownCN.Values) { list.Sort((i1, i2) => i1.Start.CompareTo(i2.Start)); } // Generate ROC output data for each QScore method foreach (CanvasSegment.QScoreMethod qscoreMethod in CanvasSegment.QScoreMethod.GetValues(typeof(CanvasSegment.QScoreMethod))) { GenerateReportAndRocDataForQscoreMethod(qscoreMethod, resegmentedKnownCN); } }