Пример #1
0
        /// <summary>
        /// Developer debug method: ROC curve data generation
        /// - Report all intervals, associated QScores and QScore predictor values to an extended report output file
        /// - Report called (i.e. TP+FP) intervals grouped by QScore
        /// - Generate 2 ROC outputs
        ///   - ROC_intervals: FP vs TP rate, unit=1 interval
        ///   - ROC_bases:     FP vs TP rate, unit=1 base
        ///   (Note: In both cases, we ignore intervals shorter than 1kb as most of them are due to imprecise ends of segments, which we don't want to give any weight to)
        /// </summary>
        private void GenerateReportAndRocDataForQscoreMethod(CanvasSegment.QScoreMethod qscoreMethod, Dictionary<string, List<CNInterval>> resegmentedKnownCN)
        {
            // Create map interval->{segment+qscore}, ignoring intervals shorter than 1kb
            Dictionary<CNInterval, Tuple<CanvasSegment, int>> Interval2Segment = new Dictionary<CNInterval, Tuple<CanvasSegment, int>>();
            foreach (string chr in resegmentedKnownCN.Keys)
            {
                foreach (CNInterval interval in resegmentedKnownCN[chr])
                {
                    foreach (CanvasSegment segment in this.Segments)
                    {
                        if (segment.Chr == chr && (segment.Begin == interval.Start || segment.End == interval.End))
                        {
                            if (interval.End - interval.Start >= 1000)
                                Interval2Segment[interval] = new Tuple<CanvasSegment, int>(segment, segment.ComputeQScore(qscoreMethod));
                        }
                    }
                }
            }

            // Classify intervals by QScore
            List<List<CNInterval>> intervalsByQScore = new List<List<CNInterval>>();
            foreach (CNInterval interval in Interval2Segment.Keys)
            {
                int qscore = Interval2Segment[interval].Item2;
                // Resize list to hold this qscore's entries
                while (qscore >= intervalsByQScore.Count())
                {
                    intervalsByQScore.Add(new List<CNInterval>());
                }
                intervalsByQScore[qscore].Add(interval);
            }

            // Output data as ExtendedCallsVersusKnownCN.txt
            string debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod.ToString() + "_ExtendedCallsVersusKnownCN.txt");
            using (StreamWriter writer = new StreamWriter(debugPath))
            {
                writer.Write("#Chr\tBegin\tEnd\tTruthSetCN\tCalledCN\tMajorChromCount\tQScore\tInfo");
                foreach (CanvasSegment.QScorePredictor predictorId in CanvasSegment.QScorePredictor.GetValues(typeof(CanvasSegment.QScorePredictor)))
                {
                    writer.Write("\tPredictor_{0}", predictorId.ToString());
                }
                writer.WriteLine("");

                foreach (string chr in resegmentedKnownCN.Keys)
                {
                    foreach (CNInterval interval in resegmentedKnownCN[chr])
                    {
                        if (Interval2Segment.ContainsKey(interval))
                        {
                            CanvasSegment segment = Interval2Segment[interval].Item1;
                            int qscore = Interval2Segment[interval].Item2;
                            string info = (interval.CN == segment.CopyNumber) ? "Correct" : "Incorrect";
                            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", chr, interval.Start, interval.End, interval.CN, segment.CopyNumber, segment.MajorChromosomeCount, qscore, info);
                            foreach (CanvasSegment.QScorePredictor predictorId in CanvasSegment.QScorePredictor.GetValues(typeof(CanvasSegment.QScorePredictor)))
                            {
                                writer.Write("\t{0}", segment.GetQScorePredictor(predictorId));
                            }
                            writer.WriteLine("");
                        }
                        else
                        {
                            string info = "Missing";
                            int CN = -1;
                            int majorChromosomeCount = -1;
                            writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", chr, interval.Start, interval.End, interval.CN, CN, majorChromosomeCount, info);
                        }
                    }
                }
            }

            // Output data by QScore
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_cnaPerQscore.txt");
            using (StreamWriter writer = new StreamWriter(debugPath))
            {
                writer.WriteLine("#Chr\tBegin\tEnd\tTruthSetCN\tCalledCN\tMajorChromCount\tMedianMAF\tMedianCoverage\tQScore\tInfo");
                for (int qscore = 0; qscore < intervalsByQScore.Count(); qscore++)
                {
                    foreach (CNInterval interval in intervalsByQScore[qscore])
                    {
                        CanvasSegment segment = Interval2Segment[interval].Item1;
                        string info = (interval.CN == segment.CopyNumber) ? "Correct" : "Incorrect";
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", segment.Chr, interval.Start, interval.End, interval.CN, segment.CopyNumber, segment.MajorChromosomeCount, qscore, info);
                    }
                }
            }

            // ROC output per interval
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_ROC_intervals.txt");
            GenerateRocOutput(debugPath, intervalsByQScore, Interval2Segment, false, false);

            // ROC output per base
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_ROC_bases.txt");
            GenerateRocOutput(debugPath, intervalsByQScore, Interval2Segment, true, false);
        }