Contains information about a genomic interval. Has functions for computing copy numbers and their likelihoods.
Esempio n. 1
0
 public void TestUsableSegments()
 {
     List<CanvasSegment> segments = new List<CanvasSegment>();
     int currentPosition = 1000;
     // Generate some segments.  Alternate between:
     // - Usable
     // - Too short
     // - Too few variants
     // - Too short + too few variants
     Random RNG = new Random();
     for (int index = 0; index < 100; index++)
     {
         int length = 100000;
         if (index % 2 == 1)
         {
             length = 2000;
         }
         int variantCount = 999;
         if (index % 4 > 1) variantCount = 25;
         List<float> counts = new List<float>();
         for (int countIndex = 0; countIndex < length / 100; countIndex++) counts.Add(RNG.Next(1000));
         CanvasSegment segment = new CanvasSegment("chr1", currentPosition, currentPosition + length, counts);
         for (int varIndex = 0; varIndex < variantCount; varIndex++)
         {
             segment.VariantFrequencies.Add(RNG.Next());
         }
         segments.Add(segment);
     }
     var usable = CanvasSomaticCaller.SomaticCaller.GetUsableSegmentsForModeling(segments, false, 50);
     Assert.AreEqual(50, usable.Count);
 }
Esempio n. 2
0
        /// <summary>
        /// Given a segment, return the expected copy number - normally this is 2, but based on the reference ploidy bed file, it could be something else.  
        /// For XX samples, reference ploidy is 0 on chrY; for XY samples, reference ploidy is 1 on chrX+chrY
        /// </summary>
        public int GetReferenceCopyNumber(CanvasSegment segment)
        {
            if (!PloidyByChromosome.ContainsKey(segment.Chr)) return 2;
            int[] baseCounts = new int[5];
            baseCounts[2] = segment.End - segment.Begin;

            foreach (PloidyInterval interval in this.PloidyByChromosome[segment.Chr])
            {
                if (interval.Ploidy == 2) continue;
                int overlapStart = Math.Max(segment.Begin, interval.Start);
                if (overlapStart > segment.End) continue;
                int overlapEnd = Math.Min(segment.End, interval.End);
                int overlapBases = overlapEnd - overlapStart;
                if (overlapBases < 0) continue;
                baseCounts[2] -= overlapBases;
                baseCounts[interval.Ploidy] += overlapBases; // ASSUMPTION: Bed file ploidy shouldn't be >4 (i.e. we wouldn't handle an XXXXXY genome):
            }
            int bestCount = 0;
            int referenceCN = 2;
            for (int CN = 0; CN < baseCounts.Length; CN++)
            {
                if (baseCounts[CN] > bestCount)
                {
                    bestCount = baseCounts[CN];
                    referenceCN = CN;
                }
            }
            return referenceCN;
        }
Esempio n. 3
0
 public int GetKnownCNForSegment(CanvasSegment segment)
 {
     // Handle switched chromosome naming convention transparently:
     string chr = segment.Chr;
     if (!this.KnownCN.ContainsKey(segment.Chr))
     {
         chr = segment.Chr.Replace("chr", "");
         if (!this.KnownCN.ContainsKey(chr))
         {
             chr = "chr" + segment.Chr;
             if (!this.KnownCN.ContainsKey(chr)) return -1;
         }
     }
     int CN = -1;
     foreach (CNInterval interval in this.KnownCN[chr])
     {
         if (interval.End < segment.Begin) continue;
         if (interval.Start > segment.End) continue;
         int start = Math.Max(segment.Begin, interval.Start);
         int end = Math.Min(segment.End, interval.End);
         if ((end - start) * 2 >= (segment.End - segment.Begin))
         {
             CN = interval.CN;
             break;
         }
     }
     return CN;
 }
Esempio n. 4
0
 /// <summary>
 /// Merge another neighboring segment into this one.
 /// </summary>
 /// <param name="s">Segment to merge in.</param>
 public void MergeIn(CanvasSegment s)
 {
     this.End   = Math.Max(this.End, s.End);
     this.Begin = Math.Min(this.Begin, s.Begin);
     this.Counts.AddRange(s.Counts);
     this.VariantFrequencies.AddRange(s.VariantFrequencies);
     this.VariantTotalCoverage.AddRange(s.VariantTotalCoverage);
 }
Esempio n. 5
0
        /// <summary>
        /// Merge another neighboring segment into this one.
        /// </summary>
        /// <param name="s">Segment to merge in.</param>
        public void MergeIn(CanvasSegment s)
        {
            this.End = Math.Max(this.End, s.End);
            this.Begin = Math.Min(this.Begin, s.Begin);
            this.Counts.AddRange(s.Counts);
            this.VariantFrequencies.AddRange(s.VariantFrequencies);
            this.VariantTotalCoverage.AddRange(s.VariantTotalCoverage);

        }
Esempio n. 6
0
        private static CanvasSegment CreateSegment(List <SampleGenomicBin> bins, Tuple <int, int> startConfidenceInterval, Tuple <int, int> endConfidenceInterval)
        {
            var segment = new CanvasSegment(bins.First().GenomicBin.Chromosome, bins.First().GenomicBin.Interval.Start, bins.Last().GenomicBin.Interval.End,
                                            bins)
            {
                StartConfidenceInterval = startConfidenceInterval,
                EndConfidenceInterval   = endConfidenceInterval
            };

            return(segment);
        }
Esempio n. 7
0
 public void TestSegmentStats()
 {
     List<float> counts = new List<float>() { 80, 79, 78, 77, 2 };
     List<CanvasSegment> segments = new List<CanvasSegment>();
     for (int index = 0; index < 10; index++)
     {
         CanvasSegment seg = new CanvasSegment("chr10", 1000000 * index, 1000000 * (index + 1), counts);
         segments.Add(seg);
     }
     double expectedCount = CanvasSegment.ExpectedCount(segments);
     Assert.AreEqual(expectedCount, 78, 0.01);
 }
Esempio n. 8
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="segment"></param>
        /// <param name="cnvType"></param>
        /// <param name="denovoQualityThreshold"></param>
        /// <returns></returns>
        private static void WriteInfoField(BgzipOrStreamWriter writer, CanvasSegment segment, CnvType cnvType, int?denovoQualityThreshold, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            string alternateAllele = cnvType.ToAltId();
            int    position        = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? segment.Begin
                : segment.Begin + 1;

            writer.Write($"{segment.Chr}\t{position}\tCanvas:{cnvType.ToVcfId()}:{segment.Chr}:{segment.Begin + 1}-{segment.End}\t");
            string qScore = "";

            qScore = isMultisample ? "." : $"{segment.QScore:F2}";
            writer.Write($"N\t{alternateAllele}\t{qScore}\t{segment.Filter}\t");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={cnvType.ToSvType()};");
            }

            if (segment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (segment.DQScore.HasValue && !isMultisample)
            {
                writer.Write($"DQ={segment.DQScore.Value};");
            }

            if (denovoQualityThreshold.HasValue & segment.DQScore.HasValue & segment.DQScore >= denovoQualityThreshold)
            {
                writer.Write($"dq{denovoQualityThreshold};");
            }
            writer.Write($"END={segment.End}");

            if (cnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={segment.End - segment.Begin}");
            }

            if (segment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={segment.StartConfidenceInterval.Item1},{segment.StartConfidenceInterval.Item2}");
            }
            if (segment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={segment.EndConfidenceInterval.Item1},{segment.EndConfidenceInterval.Item2}");
            }
        }
Esempio n. 9
0
        public void TestMergeSegments()
        {
            // Construct several segments, and invoke CanvasSegment.MergeSegments, and ensure that the expected
            // merges (and no others) occurred.
            List<CanvasSegment> allSegments = new List<CanvasSegment>();
            List<float> counts = new List<float>();
            // Chr1 gets five segments and we should merge to three:
            CanvasSegment seg = new CanvasSegment("chr1", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr1", 2000000, 2000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr1", 2000100, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr1", 3000000, 3100000, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr1", 3100000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr2 gets segments with a large gap between, so can't merge:
            seg = new CanvasSegment("chr2", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr2", 3000000, 3000100, counts);
            seg.CopyNumber = 3;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr2", 4000000, 5000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            // Chr3 has three segments that all merge to 1 big one:
            seg = new CanvasSegment("chr3", 1000000, 2000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr3", 2000000, 3000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);
            seg = new CanvasSegment("chr3", 3000000, 4000000, counts);
            seg.CopyNumber = 2;
            allSegments.Add(seg);

            CanvasSegment.MergeSegments(ref allSegments, 50000, 10000);
            Dictionary<string, List<CanvasSegment>> segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(allSegments);
            Assert.AreEqual(segmentsByChromosome["chr1"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr2"].Count, 3);
            Assert.AreEqual(segmentsByChromosome["chr3"].Count, 1);
        }
Esempio n. 10
0
        /// <summary>
        /// Loads in data produced by CanvasPartition.exe.
        /// </summary>
        /// <param name="infile">Input file.</param>
        /// <returns>A list of segments.</returns>
        public static List <CanvasSegment> ReadSegments(string infile)
        {
            Console.WriteLine("{0} Read segments from {1}", DateTime.Now, infile);
            List <CanvasSegment> segments = new List <CanvasSegment>();

            string       chr    = null;
            int          begin  = -1;
            int          end    = -1;
            int          bin    = -1;
            List <float> counts = new List <float>();

            using (GzipReader reader = new GzipReader(infile))
            {
                string row = null;

                while ((row = reader.ReadLine()) != null)
                {
                    string[] fields = row.Split('\t');

                    int currentBin = Convert.ToInt32(fields[4]);

                    // We've moved to a new segment
                    if (currentBin != bin)
                    {
                        // Make a segment
                        if (bin != -1)
                        {
                            CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                            segments.Add(segment);
                            counts.Clear();
                        }

                        chr   = fields[0];
                        begin = Convert.ToInt32(fields[1]);
                        bin   = currentBin;
                    }

                    end = Convert.ToInt32(fields[2]);
                    counts.Add(float.Parse(fields[3]));
                }

                if (bin != -1)
                {
                    // Add the last segment
                    CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                    segments.Add(segment);
                }
            }
            Console.WriteLine("{0} Loaded {1} segments", DateTime.Now, segments.Count);
            return(segments);
        }
Esempio n. 11
0
 private static void WriteSingleSampleInfo(BgzipOrStreamWriter writer, CanvasSegment segment)
 {
     writer.Write("\tRC:BC:CN", segment.End);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":MCC");
     }
     writer.Write("\t{1}:{2}:{3}", segment.End, Math.Round(segment.MeanCount, 0, MidpointRounding.AwayFromZero),
                  segment.BinCount, segment.CopyNumber);
     if (segment.MajorChromosomeCount.HasValue)
     {
         writer.Write(":{0}", segment.MajorChromosomeCount);
     }
     writer.WriteLine();
 }
Esempio n. 12
0
 /// <summary>
 /// Merge another neighboring segment into this one.
 /// </summary>
 /// <param name="s">Segment to merge in.</param>
 public void MergeIn(CanvasSegment s)
 {
     if (s.Begin < this.Begin)
     {
         this.StartConfidenceInterval = s.StartConfidenceInterval;
         this.Begin = s.Begin;
     }
     if (s.End > this.End)
     {
         this.EndConfidenceInterval = s.EndConfidenceInterval;
         this.End = s.End;
     }
     this.Counts.AddRange(s.Counts);
     Alleles.Frequencies.AddRange(s.Alleles.Frequencies);
     Alleles.TotalCoverage.AddRange(s.Alleles.TotalCoverage);
 }
Esempio n. 13
0
        /// <summary>
        /// Write to a file a single CanvasSegment record as a non-sample VCF columns
        /// </summary>
        /// <param name="writer"></param>
        /// <param name="firstSampleSegment"></param>
        /// <param name="alternateAllele"></param>
        /// <param name="recordLevelFilter"></param>
        /// <param name="sampleSetCnvType"></param>
        /// <param name="isMultisample"></param>
        /// <returns></returns>
        private static void WriteColumnsUntilInfoField(BgzipOrStreamWriter writer, CanvasSegment firstSampleSegment, CnvType sampleSetCnvType, string alternateAllele, string recordLevelFilter, bool isMultisample)
        {
            // From vcf 4.1 spec:
            //     If any of the ALT alleles is a symbolic allele (an angle-bracketed ID String “<ID>”) then the padding base is required and POS denotes the
            //     coordinate of the base preceding the polymorphism.
            int position = (alternateAllele.StartsWith("<") && alternateAllele.EndsWith(">"))
                ? firstSampleSegment.Begin
                : firstSampleSegment.Begin + 1;

            writer.Write($"{firstSampleSegment.Chr}\t{position}\tCanvas:{sampleSetCnvType.ToVcfId()}:{firstSampleSegment.Chr}:{firstSampleSegment.Begin + 1}-{firstSampleSegment.End}\t");
            string qScore = isMultisample ? "." : $"{firstSampleSegment.QScore:F2}";

            writer.Write($"N\t{alternateAllele}\t{qScore}\t{recordLevelFilter}\t");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($"SVTYPE={sampleSetCnvType.ToSvType()};");
            }

            if (firstSampleSegment.IsHeterogeneous)
            {
                writer.Write("SUBCLONAL;");
            }

            if (firstSampleSegment.IsCommonCnv)
            {
                writer.Write("COMMONCNV;");
            }

            writer.Write($"END={firstSampleSegment.End}");

            if (sampleSetCnvType != CnvType.Reference)
            {
                writer.Write($";CNVLEN={firstSampleSegment.Length}");
            }

            if (firstSampleSegment.StartConfidenceInterval != null)
            {
                writer.Write($";CIPOS={firstSampleSegment.StartConfidenceInterval.Item1},{firstSampleSegment.StartConfidenceInterval.Item2}");
            }
            if (firstSampleSegment.EndConfidenceInterval != null)
            {
                writer.Write($";CIEND={firstSampleSegment.EndConfidenceInterval.Item1},{firstSampleSegment.EndConfidenceInterval.Item2}");
            }
        }
Esempio n. 14
0
        public void TestSegment()
        {
            List<float> counts = new List<float>() {100, 90, 110, 100, 95, 105};
            CanvasSegment seg1 = new CanvasSegment("chr17", 100000000, 110000000, counts);
            // Silly constructor tests:
            Assert.AreEqual(seg1.Begin, 100000000);
            Assert.AreEqual(seg1.End, 110000000);
            Assert.AreEqual(seg1.BinCount, counts.Count);
            Assert.AreEqual(seg1.Chr, "chr17");
            // Property test:
            Assert.AreEqual(seg1.MeanCount, 100, 0.01);

            // Build a second segment, and merge them, and test results:
            CanvasSegment seg2 = new CanvasSegment("chr17", 110000000, 120000000, counts);
            seg1.MergeIn(seg2);
            Assert.AreEqual(seg1.Counts.Count, 12);
            Assert.AreEqual(seg1.End, seg2.End);
        }
Esempio n. 15
0
        /// <summary>
        /// Given a segment, return the expected copy number - normally this is 2, but based on the reference ploidy bed file, it could be something else.
        /// For XX samples, reference ploidy is 0 on chrY; for XY samples, reference ploidy is 1 on chrX+chrY
        /// </summary>
        public int GetReferenceCopyNumber(CanvasSegment segment)
        {
            if (!PloidyByChromosome.ContainsKey(segment.Chr))
            {
                return(2);
            }
            int[] baseCounts = new int[5];
            baseCounts[2] = segment.End - segment.Begin;

            foreach (PloidyInterval interval in this.PloidyByChromosome[segment.Chr])
            {
                if (interval.Ploidy == 2)
                {
                    continue;
                }
                int overlapStart = Math.Max(segment.Begin, interval.Start);
                if (overlapStart > segment.End)
                {
                    continue;
                }
                int overlapEnd   = Math.Min(segment.End, interval.End);
                int overlapBases = overlapEnd - overlapStart;
                if (overlapBases < 0)
                {
                    continue;
                }
                baseCounts[2] -= overlapBases;
                baseCounts[interval.Ploidy] += overlapBases; // ASSUMPTION: Bed file ploidy shouldn't be >4 (i.e. we wouldn't handle an XXXXXY genome):
            }
            int bestCount   = 0;
            int referenceCN = 2;

            for (int CN = 0; CN < baseCounts.Length; CN++)
            {
                if (baseCounts[CN] > bestCount)
                {
                    bestCount   = baseCounts[CN];
                    referenceCN = CN;
                }
            }
            return(referenceCN);
        }
Esempio n. 16
0
        /// <summary>
        /// Given a segment, return the expected copy number - normally this is 2, but based on the reference ploidy bed file, it could be something else.
        /// For XX samples, reference ploidy is 0 on chrY; for XY samples, reference ploidy is 1 on chrX+chrY
        /// </summary>
        public int GetReferenceCopyNumber(CanvasSegment segment)
        {
            if (!PloidyByChromosome.ContainsKey(segment.Chr))
            {
                return(2);
            }
            int[] baseCounts          = getPloidyCounts(new ReferenceInterval(segment.Chr, new Interval(segment.Begin + 1, segment.End)));
            int   bestCount           = 0;
            int   referenceCopyNumber = 2;

            for (int copyNumber = 0; copyNumber < baseCounts.Length; copyNumber++)
            {
                if (baseCounts[copyNumber] > bestCount)
                {
                    bestCount           = baseCounts[copyNumber];
                    referenceCopyNumber = copyNumber;
                }
            }
            return(referenceCopyNumber);
        }
Esempio n. 17
0
        public int GetKnownCNForSegment(CanvasSegment segment)
        {
            // Handle switched chromosome naming convention transparently:
            string chr = segment.Chr;

            if (!this.KnownCN.ContainsKey(segment.Chr))
            {
                chr = segment.Chr.Replace("chr", "");
                if (!this.KnownCN.ContainsKey(chr))
                {
                    chr = "chr" + segment.Chr;
                    if (!this.KnownCN.ContainsKey(chr))
                    {
                        return(-1);
                    }
                }
            }
            int CN = -1;

            foreach (CNInterval interval in this.KnownCN[chr])
            {
                if (interval.End < segment.Begin)
                {
                    continue;
                }
                if (interval.Start > segment.End)
                {
                    continue;
                }
                int start = Math.Max(segment.Begin, interval.Start);
                int end   = Math.Min(segment.End, interval.End);
                if ((end - start) * 2 >= (segment.End - segment.Begin))
                {
                    CN = interval.CN;
                    break;
                }
            }
            return(CN);
        }
Esempio n. 18
0
        private static int GetPloidy(PloidyInfo referencePloidy, string chrom, int start, int end, int defaultPloidy = 2)
        {
            if (referencePloidy == null) { return defaultPloidy; }

            CanvasSegment segment = new CanvasSegment(chrom, start, end, new List<float>());

            return referencePloidy.GetReferenceCopyNumber(segment);
        }
Esempio n. 19
0
        /// <summary>
        /// Parse the outputs of CanvasSNV, and note these variant frequencies in the appropriate segment.
        /// </summary>
        public static float LoadVariantFrequencies(string variantFrequencyFile, List <CanvasSegment> segments)
        {
            Console.WriteLine("{0} Load variant frequencies from {1}", DateTime.Now, variantFrequencyFile);
            int count = 0;
            Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(segments);
            Dictionary <string, string> alternativeNames = GetChromosomeAlternativeNames(segmentsByChromosome.Keys);
            long totalCoverage = 0;
            int  totalRecords  = 0;

            using (GzipReader reader = new GzipReader(variantFrequencyFile))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;                                             // Skip headers
                    }
                    string[] bits = fileLine.Split('\t');
                    if (bits.Length < 6)
                    {
                        Console.Error.WriteLine("* Bad line in {0}: '{1}'", variantFrequencyFile, fileLine);
                        continue;
                    }
                    string chromosome = bits[0];
                    if (!segmentsByChromosome.ContainsKey(chromosome))
                    {
                        if (alternativeNames.ContainsKey(chromosome))
                        {
                            chromosome = alternativeNames[chromosome];
                        }
                        else
                        {
                            continue;
                        }
                    }

                    int position = int.Parse(bits[1]); // 1-based (from the input VCF to Canvas SNV)
                    int countRef = int.Parse(bits[4]);
                    int countAlt = int.Parse(bits[5]);
                    if (countRef + countAlt < 10)
                    {
                        continue;
                    }
                    float VF = countAlt / (float)(countRef + countAlt);
                    // Binary search for the segment this variant hits:
                    List <CanvasSegment> chrSegments = segmentsByChromosome[chromosome];
                    int start = 0;
                    int end   = chrSegments.Count - 1;
                    int mid   = (start + end) / 2;
                    while (start <= end)
                    {
                        if (chrSegments[mid].End < position) // CanvasSegment.End is already 1-based
                        {
                            start = mid + 1;
                            mid   = (start + end) / 2;
                            continue;
                        }
                        if (chrSegments[mid].Begin + 1 > position) // Convert CanvasSegment.Begin to 1-based by adding 1
                        {
                            end = mid - 1;
                            mid = (start + end) / 2;
                            continue;
                        }
                        chrSegments[mid].VariantFrequencies.Add(VF);
                        chrSegments[mid].VariantTotalCoverage.Add(countRef + countAlt);
                        count++;
                        totalCoverage += (countRef + countAlt); // use only coverage information in segments
                        totalRecords++;
                        break;
                    }
                }
            }
            float meanCoverage = 0;

            if (totalRecords > 0)
            {
                meanCoverage = totalCoverage / Math.Max(1f, totalRecords);
            }
            Console.WriteLine("{0} Loaded a total of {1} usable variant frequencies", DateTime.Now, count);
            return(meanCoverage);
        }
Esempio n. 20
0
        /// <summary>
        /// Iterates through a list of segments and merges those which have the same copy number call.
        /// Also, for segments smaller than MinimumCallSize, assimilate them into the neighbor with the best
        /// quality score.  Two consecutive segments are considered neighbors if they're on the same chromosome
        /// and the space between them doesn't overlap with any excluded intervals.
        /// </summary>
        static public void MergeSegmentsUsingExcludedIntervals(ref List <CanvasSegment> segments, int MinimumCallSize,
                                                               Dictionary <string, List <SampleGenomicBin> > excludedIntervals)
        {
            if (!segments.Any())
            {
                return;
            }

            // Assimilate short segments into the *best* available neighbor:
            List <CanvasSegment> mergedSegments = new List <CanvasSegment>();
            int segmentIndex = 0;

            while (segmentIndex < segments.Count)
            {
                if (segments[segmentIndex].End - segments[segmentIndex].Begin >= MinimumCallSize)
                {
                    mergedSegments.Add(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }
                int    prevIndex = -1;
                double prevQ     = 0;
                // Look back for a segment:
                for (int checkIndex = segmentIndex - 1; checkIndex > 0; checkIndex--)
                {
                    // Stop, if you jump to another chromosome, or cross a forbidden interval:
                    if (segments[checkIndex].Chr != segments[segmentIndex].Chr)
                    {
                        break;
                    }
                    if (segments[checkIndex].End - segments[checkIndex].Begin < MinimumCallSize)
                    {
                        continue;
                    }
                    if (IsForbiddenInterval(segments[checkIndex].Chr, segments[checkIndex].End, segments[segmentIndex].Begin, excludedIntervals))
                    {
                        break;
                    }
                    prevIndex = checkIndex;
                    prevQ     = segments[checkIndex].QScore;
                    break;
                }
                // Look forward for a segment:
                int    nextIndex = -1;
                double nextQ     = 0;
                for (int checkIndex = segmentIndex + 1; checkIndex < segments.Count; checkIndex++)
                {
                    if (segments[checkIndex].Chr != segments[segmentIndex].Chr)
                    {
                        break;
                    }
                    if (segments[checkIndex].End - segments[checkIndex].Begin < MinimumCallSize)
                    {
                        continue;
                    }
                    if (IsForbiddenInterval(segments[checkIndex].Chr, segments[segmentIndex].End, segments[checkIndex].Begin, excludedIntervals))
                    {
                        break;
                    }
                    nextIndex = checkIndex;
                    nextQ     = segments[checkIndex].QScore;
                    break;
                }

                if (prevQ > 0 && prevQ >= nextQ)
                {
                    // segments[prevIndex] assimilates segments[prevIndex+1...segmentIndex].
                    // Assimilation of previous segments was already done, so we just need to assimilate this one:
                    segments[prevIndex].MergeIn(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }

                if (nextQ > 0)
                {
                    // segments[nextIndex] assimilates segments[segmentIndex...nextIndex - 1]
                    for (int tempIndex = segmentIndex; tempIndex < nextIndex; tempIndex++)
                    {
                        segments[nextIndex].MergeIn(segments[tempIndex]);
                    }
                    segmentIndex = nextIndex;
                    continue;
                }

                mergedSegments.Add(segments[segmentIndex]);
                segmentIndex++;
            }
            segments = mergedSegments;

            // Now, merge together adjacent segments with same calls!
            mergedSegments = new List <CanvasSegment>();
            CanvasSegment lastSegment = segments[0];

            mergedSegments.Add(lastSegment);
            segmentIndex = 1;
            while (segmentIndex < segments.Count)
            {
                // Assimilate an adjacent segment with the same copy number call and heterogeneity flag:
                if (lastSegment.CopyNumber == segments[segmentIndex].CopyNumber && lastSegment.Chr == segments[segmentIndex].Chr &&
                    !IsForbiddenInterval(lastSegment.Chr, lastSegment.End, segments[segmentIndex].Begin, excludedIntervals) &&
                    lastSegment.IsHeterogeneous == segments[segmentIndex].IsHeterogeneous)
                {
                    lastSegment.MergeIn(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }
                lastSegment = segments[segmentIndex];
                mergedSegments.Add(segments[segmentIndex]);
                segmentIndex++;
            }
            segments = mergedSegments;
        }
Esempio n. 21
0
        /// <summary>
        /// Loads in data produced by CanvasPartition.exe.
        /// </summary>
        /// <param name="infile">Input file.</param>
        /// <returns>A list of segments.</returns>
        public static List <CanvasSegment> ReadSegments(string infile)
        {
            Console.WriteLine("{0} Read segments from {1}", DateTime.Now, infile);
            List <CanvasSegment> segments = new List <CanvasSegment>();

            string chr   = null;
            int    begin = -1;

            int              previousSegmentIndex = -1;
            int              previousBinStart     = 0;
            int              previousBinEnd       = 0;
            List <float>     counts         = new List <float>();
            Tuple <int, int> segmentStartCI = null;

            using (GzipReader reader = new GzipReader(infile))
            {
                string row = null;

                while ((row = reader.ReadLine()) != null)
                {
                    string[] fields = row.Split('\t');

                    int currentSegmentIndex = Convert.ToInt32(fields[4]);
                    int newBinStart         = Convert.ToInt32(fields[1]);
                    int newBinEnd           = Convert.ToInt32(fields[2]);

                    // We've moved to a new segment
                    if (currentSegmentIndex != previousSegmentIndex)
                    {
                        // Make a segment
                        if (previousSegmentIndex != -1)
                        {
                            CanvasSegment segment = new CanvasSegment(chr, begin, previousBinEnd, counts);
                            // Prepare the confidence interval for the end of the segment that just ended, based on the size of its last bin
                            // (and, if the segments abut, based on the size of the next segment's first bin):
                            int CIEnd1 = -(previousBinEnd - previousBinStart) / 2;
                            int CIEnd2 = -CIEnd1;
                            if (previousBinEnd == newBinStart)
                            {
                                CIEnd2 = (newBinEnd - newBinStart) / 2;
                            }
                            segment.EndConfidenceInterval   = new Tuple <int, int>(CIEnd1, CIEnd2);
                            segment.StartConfidenceInterval = segmentStartCI;
                            segments.Add(segment);
                            counts.Clear();

                            // Prepare the confidence interval for the start of the segment that just started, based on the size of its first
                            // bin (and, if the segments abut, based on the size of the previous segment's last bin):
                            int CIStart2 = (newBinEnd - newBinStart) / 2;
                            int CIStart1 = -CIStart2;
                            if (previousBinEnd == newBinStart)
                            {
                                CIStart1 = -(previousBinEnd - previousBinStart) / 2;
                            }
                            segmentStartCI = new Tuple <int, int>(CIStart1, CIStart2);
                        }
                        else
                        {
                            int interval = (newBinEnd - newBinStart) / 2;
                            segmentStartCI = new Tuple <int, int>(-interval, interval);
                        }
                        chr   = fields[0];
                        begin = Convert.ToInt32(fields[1]);
                        previousSegmentIndex = currentSegmentIndex;
                    }
                    previousBinStart = newBinStart;
                    previousBinEnd   = newBinEnd;

                    counts.Add(float.Parse(fields[3]));
                }

                if (previousSegmentIndex != -1)
                {
                    // Add the last segment
                    CanvasSegment segment = new CanvasSegment(chr, begin, previousBinEnd, counts);
                    segments.Add(segment);
                    segment.StartConfidenceInterval = segmentStartCI;
                }
            }
            Console.WriteLine("{0} Loaded {1} segments", DateTime.Now, segments.Count);
            return(segments);
        }
Esempio n. 22
0
 /// <summary>
 /// Check whether we know the CN for this segment.  Look for a known-CN interval that 
 /// covers (at least half of) this segment.  Return -1 if we don't know its CN.
 /// </summary>
 protected int GetKnownCNForSegment(CanvasSegment segment)
 {
     if (CNOracle == null) return -1;
     return CNOracle.GetKnownCNForSegment(segment);
 }
Esempio n. 23
0
        /// <summary>
        /// Iterates through a list of segments and merges those which have the same copy number call.
        /// Also, for segments smaller than MinimumCallSize, assimilate them into the neighbor with the best
        /// quality score.  Two consecutive segments are considered neighbors if they're on the same chromosome
        /// and the space between them is not too large.
        /// </summary>
        static public void MergeSegments(ref List <CanvasSegment> segments, int MinimumCallSize = 0,
                                         int maximumMergeSpan = 10000)
        {
            if (!segments.Any())
            {
                return;
            }

            // Assimilate short segments into the *best* available neighbor:
            List <CanvasSegment> mergedSegments = new List <CanvasSegment>();
            int segmentIndex = 0;

            while (segmentIndex < segments.Count)
            {
                if (segments[segmentIndex].End - segments[segmentIndex].Begin >= MinimumCallSize)
                {
                    mergedSegments.Add(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }
                int    prevIndex = -1;
                double prevQ     = -1;
                // Look back for a segment:
                for (int checkIndex = segmentIndex - 1; checkIndex >= 0; checkIndex--)
                {
                    // Stop, if you jump to another chromosome, or cross a forbidden interval:
                    if (segments[checkIndex].Chr != segments[segmentIndex].Chr)
                    {
                        break;
                    }
                    if (segments[checkIndex].End - segments[checkIndex].Begin < MinimumCallSize)
                    {
                        continue;
                    }
                    if (segments[segmentIndex].Begin - segments[checkIndex].End > maximumMergeSpan)
                    {
                        break;
                    }
                    prevIndex = checkIndex;
                    prevQ     = segments[checkIndex].QScore;
                    break;
                }
                // Look forward for a segment:
                int    nextIndex = -1;
                double nextQ     = -1;
                for (int checkIndex = segmentIndex + 1; checkIndex < segments.Count; checkIndex++)
                {
                    if (segments[checkIndex].Chr != segments[segmentIndex].Chr)
                    {
                        break;
                    }
                    if (segments[checkIndex].End - segments[checkIndex].Begin < MinimumCallSize)
                    {
                        continue;
                    }
                    if (segments[checkIndex].Begin - segments[segmentIndex].End > maximumMergeSpan)
                    {
                        continue;
                    }
                    nextIndex = checkIndex;
                    nextQ     = segments[checkIndex].QScore;
                    break;
                }

                if (prevQ >= 0 && prevQ >= nextQ)
                {
                    // segments[prevIndex] assimilates segments[prevIndex+1...segmentIndex].
                    // Assimilation of previous segments was already done, so we just need to assimilate this one:
                    segments[prevIndex].MergeIn(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }

                if (nextQ >= 0)
                {
                    // segments[nextIndex] assimilates segments[segmentIndex...nextIndex - 1]
                    for (int tempIndex = segmentIndex; tempIndex < nextIndex; tempIndex++)
                    {
                        segments[nextIndex].MergeIn(segments[tempIndex]);
                    }
                    segmentIndex = nextIndex;
                    continue;
                }

                mergedSegments.Add(segments[segmentIndex]);
                segmentIndex++;
            }
            segments = mergedSegments;

            // Now, merge together adjacent segments with same calls!
            mergedSegments = new List <CanvasSegment>();
            CanvasSegment lastSegment = segments[0];

            mergedSegments.Add(lastSegment);
            segmentIndex = 1;
            while (segmentIndex < segments.Count)
            {
                // Assimilate an adjacent segment with the same copy number call:
                if (lastSegment.copyNumber == segments[segmentIndex].copyNumber && lastSegment.Chr == segments[segmentIndex].Chr &&
                    segments[segmentIndex].Begin - lastSegment.End < maximumMergeSpan)
                {
                    lastSegment.MergeIn(segments[segmentIndex]);
                    segmentIndex++;
                    continue;
                }
                lastSegment = segments[segmentIndex];
                mergedSegments.Add(segments[segmentIndex]);
                segmentIndex++;
            }
            segments = mergedSegments;
        }
Esempio n. 24
0
        /// <summary>
        /// Developer debug method: ROC curve data generation
        /// - Report all intervals, associated QScores and QScore predictor values to an extended report output file
        /// - Report called (i.e. TP+FP) intervals grouped by QScore
        /// - Generate 2 ROC outputs
        ///   - ROC_intervals: FP vs TP rate, unit=1 interval
        ///   - ROC_bases:     FP vs TP rate, unit=1 base
        ///   (Note: In both cases, we ignore intervals shorter than 1kb as most of them are due to imprecise ends of segments, which we don't want to give any weight to)
        /// </summary>
        private void GenerateReportAndRocDataForQscoreMethod(CanvasSegment.QScoreMethod qscoreMethod, Dictionary<string, List<CNInterval>> resegmentedKnownCN)
        {
            // Create map interval->{segment+qscore}, ignoring intervals shorter than 1kb
            Dictionary<CNInterval, Tuple<CanvasSegment, int>> Interval2Segment = new Dictionary<CNInterval, Tuple<CanvasSegment, int>>();
            foreach (string chr in resegmentedKnownCN.Keys)
            {
                foreach (CNInterval interval in resegmentedKnownCN[chr])
                {
                    foreach (CanvasSegment segment in this.Segments)
                    {
                        if (segment.Chr == chr && (segment.Begin == interval.Start || segment.End == interval.End))
                        {
                            if (interval.End - interval.Start >= 1000)
                                Interval2Segment[interval] = new Tuple<CanvasSegment, int>(segment, segment.ComputeQScore(qscoreMethod));
                        }
                    }
                }
            }

            // Classify intervals by QScore
            List<List<CNInterval>> intervalsByQScore = new List<List<CNInterval>>();
            foreach (CNInterval interval in Interval2Segment.Keys)
            {
                int qscore = Interval2Segment[interval].Item2;
                // Resize list to hold this qscore's entries
                while (qscore >= intervalsByQScore.Count())
                {
                    intervalsByQScore.Add(new List<CNInterval>());
                }
                intervalsByQScore[qscore].Add(interval);
            }

            // Output data as ExtendedCallsVersusKnownCN.txt
            string debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod.ToString() + "_ExtendedCallsVersusKnownCN.txt");
            using (StreamWriter writer = new StreamWriter(debugPath))
            {
                writer.Write("#Chr\tBegin\tEnd\tTruthSetCN\tCalledCN\tMajorChromCount\tQScore\tInfo");
                foreach (CanvasSegment.QScorePredictor predictorId in CanvasSegment.QScorePredictor.GetValues(typeof(CanvasSegment.QScorePredictor)))
                {
                    writer.Write("\tPredictor_{0}", predictorId.ToString());
                }
                writer.WriteLine("");

                foreach (string chr in resegmentedKnownCN.Keys)
                {
                    foreach (CNInterval interval in resegmentedKnownCN[chr])
                    {
                        if (Interval2Segment.ContainsKey(interval))
                        {
                            CanvasSegment segment = Interval2Segment[interval].Item1;
                            int qscore = Interval2Segment[interval].Item2;
                            string info = (interval.CN == segment.CopyNumber) ? "Correct" : "Incorrect";
                            writer.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", chr, interval.Start, interval.End, interval.CN, segment.CopyNumber, segment.MajorChromosomeCount, qscore, info);
                            foreach (CanvasSegment.QScorePredictor predictorId in CanvasSegment.QScorePredictor.GetValues(typeof(CanvasSegment.QScorePredictor)))
                            {
                                writer.Write("\t{0}", segment.GetQScorePredictor(predictorId));
                            }
                            writer.WriteLine("");
                        }
                        else
                        {
                            string info = "Missing";
                            int CN = -1;
                            int majorChromosomeCount = -1;
                            writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", chr, interval.Start, interval.End, interval.CN, CN, majorChromosomeCount, info);
                        }
                    }
                }
            }

            // Output data by QScore
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_cnaPerQscore.txt");
            using (StreamWriter writer = new StreamWriter(debugPath))
            {
                writer.WriteLine("#Chr\tBegin\tEnd\tTruthSetCN\tCalledCN\tMajorChromCount\tMedianMAF\tMedianCoverage\tQScore\tInfo");
                for (int qscore = 0; qscore < intervalsByQScore.Count(); qscore++)
                {
                    foreach (CNInterval interval in intervalsByQScore[qscore])
                    {
                        CanvasSegment segment = Interval2Segment[interval].Item1;
                        string info = (interval.CN == segment.CopyNumber) ? "Correct" : "Incorrect";
                        writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", segment.Chr, interval.Start, interval.End, interval.CN, segment.CopyNumber, segment.MajorChromosomeCount, qscore, info);
                    }
                }
            }

            // ROC output per interval
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_ROC_intervals.txt");
            GenerateRocOutput(debugPath, intervalsByQScore, Interval2Segment, false, false);

            // ROC output per base
            debugPath = Path.Combine(this.OutputFolder, "qscore_" + qscoreMethod + "_ROC_bases.txt");
            GenerateRocOutput(debugPath, intervalsByQScore, Interval2Segment, true, false);
        }
Esempio n. 25
0
        /// <summary>
        /// Loads in data produced by CanvasPartition.exe.
        /// </summary>
        /// <param name="infile">Input file.</param>
        /// <returns>A list of segments.</returns>
        public static List<CanvasSegment> ReadSegments(string infile)
        {
            Console.WriteLine("{0} Read segments from {1}", DateTime.Now, infile);
            List<CanvasSegment> segments = new List<CanvasSegment>();

            string chr = null;
            int begin = -1;
            int end = -1;
            int bin = -1;
            List<float> counts = new List<float>();

            using (GzipReader reader = new GzipReader(infile))
            {
                string row = null;

                while ((row = reader.ReadLine()) != null)
                {
                    string[] fields = row.Split('\t');

                    int currentBin = Convert.ToInt32(fields[4]);

                    // We've moved to a new segment
                    if (currentBin != bin)
                    {
                        // Make a segment
                        if (bin != -1)
                        {
                            CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                            segments.Add(segment);
                            counts.Clear();
                        }

                        chr = fields[0];
                        begin = Convert.ToInt32(fields[1]);
                        bin = currentBin;

                    }

                    end = Convert.ToInt32(fields[2]);
                    counts.Add(float.Parse(fields[3]));

                }

                if (bin != -1)
                {
                    // Add the last segment
                    CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                    segments.Add(segment);
                }
            }
            Console.WriteLine("{0} Loaded {1} segments", DateTime.Now, segments.Count);
            return segments;
        }