Exemplo n.º 1
0
 /// <summary>
 /// Load known CN data from a .bed file.  File lines have fields:
 /// chromosome, start, end, chromcountA, chromcountB
 /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields.
 /// </summary>
 /// <param name="oracleBedPath"></param>
 protected void LoadKnownCNBed(string oracleBedPath)
 {
     bool stripChr = false;
     int count = 0;
     this.KnownCN = new Dictionary<string, List<CNInterval>>();
     using (StreamReader reader = new StreamReader(oracleBedPath))
     {
         while (true)
         {
             string fileLine = reader.ReadLine();
             if (fileLine == null) break;
             if (fileLine.Length == 0 || fileLine[0] == '#') continue;
             string[] bits = fileLine.Split('\t');
             string chromosome = bits[0];
             if (stripChr) chromosome = chromosome.Replace("chr", "");
             if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>();
             CNInterval interval = new CNInterval();
             interval.Start = int.Parse(bits[1]);
             interval.End = int.Parse(bits[2]);
             interval.CN = int.Parse(bits[3]) + int.Parse(bits[4]);
             KnownCN[chromosome].Add(interval);
             count++;
         }
     }
     Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
 }
Exemplo n.º 2
0
        /// <summary>
        /// Load known CN data from a .bed file.  File lines have fields:
        /// chromosome, start, end, chromcountA, chromcountB
        /// So, copy number is the sum of the last 2 fields, major chromosome count is the max of the last 2 fields.
        /// </summary>
        /// <param name="oracleBedPath"></param>
        protected void LoadKnownCNBed(string oracleBedPath)
        {
            bool stripChr = false;
            int  count    = 0;

            this.KnownCN = new Dictionary <string, List <CNInterval> >();
            using (FileStream stream = new FileStream(oracleBedPath, FileMode.Open, FileAccess.Read))
                using (StreamReader reader = new StreamReader(stream))
                {
                    while (true)
                    {
                        string fileLine = reader.ReadLine();
                        if (fileLine == null)
                        {
                            break;
                        }
                        if (fileLine.Length == 0 || fileLine[0] == '#')
                        {
                            continue;
                        }
                        string[] bits       = fileLine.Split('\t');
                        string   chromosome = bits[0];
                        if (stripChr)
                        {
                            chromosome = chromosome.Replace("chr", "");
                        }
                        if (!KnownCN.ContainsKey(chromosome))
                        {
                            KnownCN[chromosome] = new List <CNInterval>();
                        }
                        CNInterval interval = new CNInterval();
                        interval.Start = int.Parse(bits[1]);
                        interval.End   = int.Parse(bits[2]);
                        interval.CN    = int.Parse(bits[3]) + int.Parse(bits[4]);
                        if (bits.Length > 5)
                        {
                            interval.Heterogeneity = double.Parse(bits[5]);
                        }
                        else
                        {
                            interval.Heterogeneity = -1.0;
                        }
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }
Exemplo n.º 3
0
        protected void LoadKnownCNVCF(string oracleVCFPath)
        {
            bool stripChr = false;

            // Load our "oracle" of known copy numbers:
            this.KnownCN = new Dictionary <string, List <CNInterval> >();
            int count = 0;

            using (GzipReader reader = new GzipReader(oracleVCFPath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;
                    }
                    string[] bits = fileLine.Split('\t');
                    if (bits.Length == 1 && bits[0].Trim().Length == 0)
                    {
                        continue;                                                 // skip empty lines!
                    }
                    string chromosome = bits[0];
                    if (stripChr)
                    {
                        chromosome = chromosome.Replace("chr", "");
                    }
                    if (!KnownCN.ContainsKey(chromosome))
                    {
                        KnownCN[chromosome] = new List <CNInterval>();
                    }
                    CNInterval interval = new CNInterval();
                    interval.Start = int.Parse(bits[1]);
                    interval.CN    = -1;
                    string[] infoBits = bits[7].Split(';');
                    foreach (string subBit in infoBits)
                    {
                        if (subBit.StartsWith("CN="))
                        {
                            float tempCN = float.Parse(subBit.Substring(3));
                            if (subBit.EndsWith(".5"))
                            {
                                interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
                            }
                            else
                            {
                                interval.CN = (int)Math.Round(tempCN); // Round off
                            }
                        }
                        if (subBit.StartsWith("END="))
                        {
                            interval.End = int.Parse(subBit.Substring(4));
                        }
                    }
                    // Parse CN from Canvas output:
                    if (bits.Length > 8)
                    {
                        string[] subBits  = bits[8].Split(':');
                        string[] subBits2 = bits[9].Split(':');
                        for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                        {
                            if (subBits[subBitIndex] == "CN")
                            {
                                interval.CN = int.Parse(subBits2[subBitIndex]);
                            }
                        }
                    }
                    if (interval.End == 0 || interval.CN < 0)
                    {
                        Console.WriteLine("Error - bogus record!");
                        Console.WriteLine(fileLine);
                    }
                    else
                    {
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }
Exemplo n.º 4
0
        protected void LoadKnownCNVCF(string oracleVCFPath)
        {
            bool stripChr = false;

            // Load our "oracle" of known copy numbers:
            this.KnownCN = new Dictionary<string, List<CNInterval>>();
            int count = 0;
            using (GzipReader reader = new GzipReader(oracleVCFPath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null) break;
                    if (fileLine.Length == 0 || fileLine[0] == '#') continue;
                    string[] bits = fileLine.Split('\t');
                    if (bits.Length == 1 && bits[0].Trim().Length == 0) continue; // skip empty lines!
                    string chromosome = bits[0];
                    if (stripChr) chromosome = chromosome.Replace("chr", "");
                    if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>();
                    CNInterval interval = new CNInterval();
                    interval.Start = int.Parse(bits[1]);
                    interval.CN = -1;
                    string[] infoBits = bits[7].Split(';');
                    foreach (string subBit in infoBits)
                    {
                        if (subBit.StartsWith("CN="))
                        {
                            float tempCN = float.Parse(subBit.Substring(3));
                            if (subBit.EndsWith(".5"))
                            {
                                interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
                            }
                            else
                            {
                                interval.CN = (int)Math.Round(tempCN); // Round off
                            }
                        }
                        if (subBit.StartsWith("END="))
                        {
                            interval.End = int.Parse(subBit.Substring(4));
                        }
                    }
                    // Parse CN from Canvas output:
                    if (bits.Length > 8)
                    {
                        string[] subBits = bits[8].Split(':');
                        string[] subBits2 = bits[9].Split(':');
                        for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                        {
                            if (subBits[subBitIndex] == "CN")
                            {
                                interval.CN = int.Parse(subBits2[subBitIndex]);
                            }
                        }
                    }
                    if (interval.End == 0 || interval.CN < 0)
                    {
                        Console.WriteLine("Error - bogus record!");
                        Console.WriteLine(fileLine);
                    }
                    else
                    {
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Developer debug method:
        /// - Split each truth interval to have at least the same segmentation as the called segments
        ///   (Note: We are intentionally ignoring segments - or parts thereof - called in areas not defined in the Truth set)
        /// - For each QScore method:
        ///   - Report these new intervals and associated QScores to an extended report output file
        ///   - Generate ROC output
        /// </summary>
        private void GenerateExtendedReportVersusKnownCN()
        {
            Dictionary<string, List<CNInterval>> resegmentedKnownCN = new Dictionary<string, List<CNInterval>>();

            // Copy KnownCN entries to working container
            foreach (string chr in this.CNOracle.KnownCN.Keys)
            {
                resegmentedKnownCN[chr] = new List<CNInterval>();
                foreach (CNInterval interval in this.CNOracle.KnownCN[chr])
                {
                    CNInterval newInterval = new CNInterval();
                    newInterval.Start = interval.Start;
                    newInterval.End = interval.End;
                    newInterval.CN = interval.CN;
                    resegmentedKnownCN[chr].Add(newInterval);
                }
            }

            // Split each truth interval to match the segments' breakpoints
            foreach (CanvasSegment segment in this.Segments)
            {
                if (!resegmentedKnownCN.ContainsKey(segment.Chr)) continue;
                for (int i = 0; i < resegmentedKnownCN[segment.Chr].Count; i++) // Using for loop instead of foreach because we add items to the list
                {
                    CNInterval interval = resegmentedKnownCN[segment.Chr][i];
                    if (interval.Start == segment.Begin && interval.End == segment.End) break; // perfect segment-knownCN match
                    if (interval.Start >= segment.End || interval.End <= segment.Begin) continue; // segment completely outside this knownCN

                    // If necessary, split interval at segment.Begin position (i.e. extract sub-interval preceding segment)
                    if (segment.Begin > interval.Start)
                    {
                        CNInterval newInterval = new CNInterval();
                        newInterval.Start = interval.Start;
                        newInterval.End = segment.Begin;
                        newInterval.CN = interval.CN;
                        interval.Start = newInterval.End;
                        resegmentedKnownCN[segment.Chr].Add(newInterval);
                    }

                    // If necessary, split interval at segment.End position (i.e. extract sub-interval following segment)
                    if (segment.End < interval.End)
                    {
                        CNInterval newInterval = new CNInterval();
                        newInterval.Start = segment.End;
                        newInterval.End = interval.End;
                        newInterval.CN = interval.CN;
                        interval.End = newInterval.Start;
                        resegmentedKnownCN[segment.Chr].Add(newInterval);
                    }
                }
            }

            // Sort list of new intervals by starting position, just for prettiness
            foreach (List<CNInterval> list in resegmentedKnownCN.Values)
            {
                list.Sort((i1, i2) => i1.Start.CompareTo(i2.Start));
            }

            // Generate ROC output data for each QScore method
            foreach (CanvasSegment.QScoreMethod qscoreMethod in CanvasSegment.QScoreMethod.GetValues(typeof(CanvasSegment.QScoreMethod)))
            {
                GenerateReportAndRocDataForQscoreMethod(qscoreMethod, resegmentedKnownCN);
            }
        }