Esempio n. 1
0
File: IO.cs Progetto: abladon/canvas
        public static List<GenomicBin> ReadFromTextFile(string infile)
        {
            List<GenomicBin> bins = new List<GenomicBin>();

            using (GzipReader reader = new GzipReader(infile))
            {
                string row;

                while ((row = reader.ReadLine()) != null)
                {

                    string[] fields = row.Split('\t');

                    string chr = fields[0];
                    int start = Convert.ToInt32(fields[1]);
                    int stop = Convert.ToInt32(fields[2]);
                    //int count = Convert.ToInt32(fields[3]);
                    float count = float.Parse(fields[3]);
                    int gc = Convert.ToInt32(fields[4]);

                    GenomicBin bin = new GenomicBin(chr, start, stop, gc, count);
                    bins.Add(bin);
                }
            }
            return bins;
        }
Esempio n. 2
0
 public static PloidyInfo LoadPloidyFromBedFile(string filePath)
 {
     PloidyInfo ploidy = new PloidyInfo();
     int count = 0;
     using (GzipReader reader = new GzipReader(filePath))
     {
         while (true)
         {
             string fileLine = reader.ReadLine();
             if (fileLine == null) break;
             if (fileLine.StartsWith("##ExpectedSexChromosomeKaryotype"))
             {
                 ploidy.HeaderLine = fileLine.Trim();
                 continue;
             }
             if (fileLine.Length == 0 || fileLine[0] == '#') continue;
             string[] bits = fileLine.Split('\t');
             string chromosome = bits[0];
             if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
             {
                 ploidy.PloidyByChromosome[chromosome] = new List<PloidyInterval>();
             }
             PloidyInterval interval = new PloidyInterval();
             interval.Start = int.Parse(bits[1]);
             interval.End = int.Parse(bits[2]);
             interval.Ploidy = int.Parse(bits[4]);
             ploidy.PloidyByChromosome[chromosome].Add(interval);
             count++;
         }
     }
     Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count);
     return ploidy;
 }
Esempio n. 3
0
 /// <summary>
 /// uncompress foo.gz to foo
 /// this will probably fail if there are no newlines in the file and the entire file cannot fit into memory
 /// TODO: implement GzipReader.ReadBytes so that we don't need to use ReadLine/WriteLine
 /// </summary>
 public static void UncompressFile(string sourcePath, string targetPath)
 {
     using (StreamWriter writer = new StreamWriter(targetPath))
     using (GzipReader reader = new GzipReader(sourcePath))
     {
         writer.NewLine = "\n";
         while (true)
         {
             string fileLine = reader.ReadLine();
             if (fileLine == null) break;
             writer.WriteLine(fileLine);
         }
     }
 }
Esempio n. 4
0
        private static void GetWeightedAverageBinCount(IEnumerable<string> binnedPaths, string mergedBinnedPath,
            NexteraManifest manifest = null)
        {
            int sampleCount = binnedPaths.Count();
            if (sampleCount == 1) // copy file
            {
                if (File.Exists(binnedPaths.First()))
                {
                    if (File.Exists(mergedBinnedPath)) { File.Delete(mergedBinnedPath); }
                    File.Copy(binnedPaths.First(), mergedBinnedPath);
                }
            }
            else // merge normal samples
            {
                double[] weights = new double[sampleCount];
                List<double>[] binCountsBySample = new List<double>[sampleCount];
                for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
                {
                    string binnedPath = binnedPaths.ElementAt(sampleIndex);
                    var binCounts = new BinCounts(binnedPath, manifest: manifest);
                    List<double> counts = binCounts.AllCounts;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double median = binCounts.OnTargetMedianBinCount;
                    weights[sampleIndex] = median > 0 ? 1.0 / median : 0;
                    binCountsBySample[sampleIndex] = counts;
                }
                double weightSum = weights.Sum();
                for (int i = 0; i < sampleCount; i++) { weights[i] /= weightSum; } // so weights sum to 1

                // Computed weighted average of bin counts across samples
                using (GzipReader reader = new GzipReader(binnedPaths.First()))
                using (GzipWriter writer = new GzipWriter(mergedBinnedPath))
                {
                    string line;
                    string[] toks;
                    int lineIdx = 0;
                    while ((line = reader.ReadLine()) != null)
                    {
                        toks = line.Split('\t');
                        double weightedBinCount = 0;
                        for (int i = 0; i < sampleCount; i++) { weightedBinCount += weights[i] * binCountsBySample[i][lineIdx]; }
                        toks[3] = String.Format("{0}", weightedBinCount);
                        writer.WriteLine(String.Join("\t", toks));
                        lineIdx++;
                    }
                }
            }
        }
Esempio n. 5
0
		public static void Uncompress(string tempPath, string outputPath)
		{
			if (!File.Exists(tempPath))
			{
				return;
			}
			using (GzipReader reader = new GzipReader(tempPath))
			using (StreamWriter writer = new StreamWriter(outputPath))
			{
				writer.NewLine = "\n";
				while (true)
				{
					string FileLine = reader.ReadLine();
					if (FileLine == null) break;
					writer.WriteLine(FileLine);
				}
			}
		}
Esempio n. 6
0
        /// <summary>
        /// Assume that the rows are sorted by the start position and ascending order
        /// </summary>
        private void ReadBEDInput()
        {
            try
            {
                Dictionary<string, List<uint>> startByChr = new Dictionary<string, List<uint>>(),
                    endByChr = new Dictionary<string, List<uint>>();
                Dictionary<string, List<double>> scoreByChr = new Dictionary<string, List<double>>();
                // Create an instance of StreamReader to read from a file.
                // The using statement also closes the StreamReader.
                using (GzipReader reader = new GzipReader(this.InputBinPath))
                {
                    string line;
                    string[] tokens;
                    while ((line = reader.ReadLine()) != null)
                    {
                        tokens = line.Split('\t');
                        string chr = tokens[Segmentation.idxChr].Trim();
                        if (!startByChr.ContainsKey(chr))
                        {
                            startByChr.Add(chr, new List<uint>());
                            endByChr.Add(chr, new List<uint>());
                            scoreByChr.Add(chr, new List<double>());
                        }
                        startByChr[chr].Add(Convert.ToUInt32(tokens[Segmentation.idxStart].Trim()));
                        endByChr[chr].Add(Convert.ToUInt32(tokens[Segmentation.idxEnd].Trim()));
                        scoreByChr[chr].Add(Convert.ToDouble(tokens[this.idxScore].Trim()));
                    }
                    foreach (string chr in startByChr.Keys)
                    {
                        this.StartByChr[chr] = startByChr[chr].ToArray();
                        this.EndByChr[chr] = endByChr[chr].ToArray();
                        this.ScoreByChr[chr] = scoreByChr[chr].ToArray();
                    }

                }
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("File {0} could not be read:", this.InputBinPath);
                Console.Error.WriteLine(e.Message);
                Environment.Exit(1);
            }
        }
Esempio n. 7
0
        protected void LoadKnownCNVCF(string oracleVCFPath)
        {
            bool stripChr = false;

            // Load our "oracle" of known copy numbers:
            this.KnownCN = new Dictionary<string, List<CNInterval>>();
            int count = 0;
            using (GzipReader reader = new GzipReader(oracleVCFPath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null) break;
                    if (fileLine.Length == 0 || fileLine[0] == '#') continue;
                    string[] bits = fileLine.Split('\t');
                    string chromosome = bits[0];
                    if (stripChr) chromosome = chromosome.Replace("chr", "");
                    if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>();
                    CNInterval interval = new CNInterval();
                    interval.Start = int.Parse(bits[1]);
                    interval.CN = -1;
                    string[] infoBits = bits[7].Split(';');
                    foreach (string subBit in infoBits)
                    {
                        if (subBit.StartsWith("CN="))
                        {
                            float tempCN = float.Parse(subBit.Substring(3));
                            if (subBit.EndsWith(".5"))
                            {
                                interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
                            }
                            else
                            {
                                interval.CN = (int)Math.Round(tempCN); // Round off
                            }
                        }
                        if (subBit.StartsWith("END="))
                        {
                            interval.End = int.Parse(subBit.Substring(4));
                        }
                    }
                    // Parse CN from Canvas output:
                    if (bits.Length > 8)
                    {
                        string[] subBits = bits[8].Split(':');
                        string[] subBits2 = bits[9].Split(':');
                        for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
                        {
                            if (subBits[subBitIndex] == "CN")
                            {
                                interval.CN = int.Parse(subBits2[subBitIndex]);
                            }
                        }
                    }
                    if (interval.End == 0 || interval.CN < 0)
                    {
                        Console.WriteLine("Error - bogus record!");
                        Console.WriteLine(fileLine);
                    }
                    else
                    {
                        KnownCN[chromosome].Add(interval);
                        count++;
                    }
                }
            }
            Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
        }
Esempio n. 8
0
 public static IGenomesReferencePath GetReferenceFromVcfHeader(string vcfPath)
 {
     string referencePath = null;
     using (GzipReader Reader = new GzipReader(vcfPath))
     {
         while (true)
         {
             string FileLine = Reader.ReadLine();
             if (FileLine == null || !FileLine.StartsWith("#"))
                 break;
             if (FileLine.StartsWith("##reference="))
                 referencePath = FileLine.Substring(12);
         }
     }
     return SafeGetReference(referencePath);
 }
Esempio n. 9
0
        /// <summary>
        /// Invoke CanvasSNV.  Return null if this fails and we need to abort CNV calling for this sample.
        /// </summary>
        protected void InvokeCanvasSnv(CanvasCallset callset)
        {
            List<UnitOfWork> jobList = new List<UnitOfWork>();
            List<string> outputPaths = new List<string>();
            GenomeMetadata genomeMetadata = callset.GenomeMetadata;

            string tumorBamPath = callset.Bam.BamFile.FullName;
            string normalVcfPath = callset.NormalVcfPath.FullName;
            foreach (GenomeMetadata.SequenceMetadata chromosome in genomeMetadata.Sequences)
            {
                // Only invoke for autosomes + allosomes;
                // don't invoke it for mitochondrial chromosome or extra contigs or decoys
                if (chromosome.Type != GenomeMetadata.SequenceType.Allosome && !chromosome.IsAutosome())
                    continue;

                UnitOfWork job = new UnitOfWork();
                job.ExecutablePath = Path.Combine(_canvasFolder, "CanvasSNV.exe");
                if (CrossPlatform.IsThisMono())
                {
                    job.CommandLine = job.ExecutablePath;
                    job.ExecutablePath = Utilities.GetMonoPath();
                }

                string outputPath = Path.Combine(callset.TempFolder, string.Format("{0}-{1}.SNV.txt.gz", chromosome.Name, callset.Id));
                outputPaths.Add(outputPath);
                job.CommandLine += $" {chromosome.Name} {normalVcfPath} {tumorBamPath} {outputPath}";
                if (_customParameters.ContainsKey("CanvasSNV"))
                {
                    job.CommandLine = Utilities.MergeCommandLineOptions(job.CommandLine, _customParameters["CanvasSNV"], true);
                }
                job.LoggingFolder = _workManager.LoggingFolder.FullName;
                job.LoggingStub = string.Format("CanvasSNV-{0}-{1}", callset.Id, chromosome.Name);
                jobList.Add(job);
            }
            Console.WriteLine("Invoking {0} processor jobs...", jobList.Count);

            // Invoke CanvasSNV jobs:
            Console.WriteLine(">>>CanvasSNV start...");
            _workManager.DoWorkParallelThreads(jobList);
            Console.WriteLine(">>>CanvasSNV complete!");

            // Concatenate CanvasSNV results:
            using (GzipWriter writer = new GzipWriter(callset.VfSummaryPath))
            {
                bool headerWritten = false;
                foreach (string outputPath in outputPaths)
                {
                    if (!File.Exists(outputPath))
                    {
                        Console.WriteLine("Error: Expected output file not found at {0}", outputPath);
                        continue;
                    }
                    using (GzipReader reader = new GzipReader(outputPath))
                    {
                        while (true)
                        {
                            string fileLine = reader.ReadLine();
                            if (fileLine == null) break;
                            if (fileLine.Length > 0 && fileLine[0] == '#')
                            {
                                if (headerWritten) continue;
                                headerWritten = true;
                            }
                            writer.WriteLine(fileLine);
                        }
                    }
                }
            }
        }
Esempio n. 10
0
File: IO.cs Progetto: abladon/canvas
        /// <summary>
        /// Parse the outputs of CanvasSNV, and note these variant frequencies in the appropriate segment.
        /// </summary>
        public static float LoadVariantFrequencies(string variantFrequencyFile, List<CanvasSegment> segments)
        {
            
            Console.WriteLine("{0} Load variant frequencies from {1}", DateTime.Now, variantFrequencyFile);
            int count = 0;
            Dictionary<string, List<CanvasSegment>> segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(segments);
            Dictionary<string, string> alternativeNames = GetChromosomeAlternativeNames(segmentsByChromosome.Keys);
            long totalCoverage = 0;
            int totalRecords = 0;
            using (GzipReader reader = new GzipReader(variantFrequencyFile))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null) break;
                    if (fileLine.Length == 0 || fileLine[0] == '#') continue; // Skip headers
                    string[] bits = fileLine.Split('\t');
                    if (bits.Length < 6)
                    {
                        Console.Error.WriteLine("* Bad line in {0}: '{1}'", variantFrequencyFile, fileLine);
                        continue;
                    }
                    string chromosome = bits[0];
                    if (!segmentsByChromosome.ContainsKey(chromosome))
                    {
                        if (alternativeNames.ContainsKey(chromosome))
                        {
                            chromosome = alternativeNames[chromosome];
                        }
                        else continue;
                    }

                    int position = int.Parse(bits[1]); // 1-based (from the input VCF to Canvas SNV)
                    int countRef = int.Parse(bits[4]);
                    int countAlt = int.Parse(bits[5]);
                    if (countRef + countAlt < 10) continue;
                    float VF = countAlt / (float)(countRef + countAlt);
                    // Binary search for the segment this variant hits:
                    List<CanvasSegment> chrSegments = segmentsByChromosome[chromosome];
                    int start = 0;
                    int end = chrSegments.Count - 1;
                    int mid = (start + end) / 2;
                    while (start <= end)
                    {
                        if (chrSegments[mid].End < position) // CanvasSegment.End is already 1-based
                        {
                            start = mid + 1;
                            mid = (start + end) / 2;
                            continue;
                        }
                        if (chrSegments[mid].Begin + 1 > position) // Convert CanvasSegment.Begin to 1-based by adding 1
                        {
                            end = mid - 1;
                            mid = (start + end) / 2;
                            continue;
                        }
                        chrSegments[mid].VariantFrequencies.Add(VF);
                        chrSegments[mid].VariantTotalCoverage.Add(countRef + countAlt);
                        count++;
                        totalCoverage += (countRef + countAlt); // use only coverage information in segments
                        totalRecords++;
                        break;
                    }
                }
            }
            float meanCoverage = 0;
            if (totalRecords > 0)
                meanCoverage = totalCoverage / Math.Max(1f, totalRecords);
            Console.WriteLine("{0} Loaded a total of {1} usable variant frequencies", DateTime.Now, count);
            return meanCoverage;
        }
Esempio n. 11
0
        /// <summary>
        /// Loads in data produced by CanvasPartition.exe.
        /// </summary>
        /// <param name="infile">Input file.</param>
        /// <returns>A list of segments.</returns>
        public static List<CanvasSegment> ReadSegments(string infile)
        {
            Console.WriteLine("{0} Read segments from {1}", DateTime.Now, infile);
            List<CanvasSegment> segments = new List<CanvasSegment>();

            string chr = null;
            int begin = -1;
            int end = -1;
            int bin = -1;
            List<float> counts = new List<float>();

            using (GzipReader reader = new GzipReader(infile))
            {
                string row = null;

                while ((row = reader.ReadLine()) != null)
                {
                    string[] fields = row.Split('\t');

                    int currentBin = Convert.ToInt32(fields[4]);

                    // We've moved to a new segment
                    if (currentBin != bin)
                    {
                        // Make a segment
                        if (bin != -1)
                        {
                            CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                            segments.Add(segment);
                            counts.Clear();
                        }

                        chr = fields[0];
                        begin = Convert.ToInt32(fields[1]);
                        bin = currentBin;

                    }

                    end = Convert.ToInt32(fields[2]);
                    counts.Add(float.Parse(fields[3]));

                }

                if (bin != -1)
                {
                    // Add the last segment
                    CanvasSegment segment = new CanvasSegment(chr, begin, end, counts);
                    segments.Add(segment);
                }
            }
            Console.WriteLine("{0} Loaded {1} segments", DateTime.Now, segments.Count);
            return segments;
        }
Esempio n. 12
0
        private static void LoadBinCounts(string binnedPath, out List<double> binCounts)
        {
            binCounts = new List<double>();

            using (GzipReader reader = new GzipReader(binnedPath))
            {
                string line;
                string[] toks;
                while ((line = reader.ReadLine()) != null)
                {
                    toks = line.Split('\t');
                    binCounts.Add(double.Parse(toks[3]));
                }
            }
        }
Esempio n. 13
0
        private static void LoadBinCounts(string binnedPath, NexteraManifest manifest, out List<double> binCounts,
            out List<int> onTargetIndices)
        {
            binCounts = new List<double>();
            onTargetIndices = new List<int>();

            var regionsByChrom = manifest.GetManifestRegionsByChromosome();
            string currChrom = null;
            List<NexteraManifest.ManifestRegion> regions = null; // 1-based regions
            int regionIndex = -1;
            bool onTarget = false;
            using (GzipReader reader = new GzipReader(binnedPath))
            {
                string line;
                string[] toks;
                int binIdx = 0;
                while ((line = reader.ReadLine()) != null)
                {
                    toks = line.Split('\t');
                    string chrom = toks[0];
                    int start = int.Parse(toks[1]); // 0-based, inclusive
                    int stop = int.Parse(toks[2]); // 0-based, exclusive
                    if (currChrom != chrom)
                    {
                        currChrom = chrom;
                        onTarget = false;
                        if (!regionsByChrom.ContainsKey(currChrom))
                        {
                            regions = null;
                        }
                        else
                        {
                            regions = regionsByChrom[currChrom];
                            regionIndex = 0;
                        }
                    }
                    while (regions != null && regionIndex < regions.Count && regions[regionIndex].End < start + 1)
                    {
                        regionIndex++;
                    }
                    if (regions != null && regionIndex < regions.Count && regions[regionIndex].Start <= stop) // overlap
                    {
                        onTarget = true;
                    }
                    else
                    {
                        onTarget = false;
                    }

                    if (onTarget) { onTargetIndices.Add(binIdx); }

                    binCounts.Add(double.Parse(toks[3]));
                    binIdx++;
                }
            }
        }
Esempio n. 14
0
 /// <summary>
 ///     Opens the file
 /// </summary>
 public override void Open(string filename)
 {
     IsOpen = true;
     reader = new GzipReader(filename);
 }
Esempio n. 15
0
        /// <summary>
        /// Intersect bins with the targeted regions defined in callset.Manifest.
        /// Assumes that the targeted regions don't intersect, the bins are sorted by genomic location and the bins don't intersect.
        /// </summary>
        /// <param name="callset"></param>
        /// <param name="partitionedPath">Output of CanvasPartition. Bins are assumed to be sorted</param>
        /// <returns></returns>
        private IFileLocation IntersectBinsWithTargetedRegions(CanvasCallset callset, IFileLocation partitionedPath)
        {
            if (!partitionedPath.Exists) { return partitionedPath; }
            var rawPartitionedPath = partitionedPath.AppendName(".raw");
            if (rawPartitionedPath.Exists) { rawPartitionedPath.Delete(); }
            partitionedPath.MoveTo(rawPartitionedPath);

            //callset.Manifest
            Dictionary<string, List<NexteraManifest.ManifestRegion>> manifestRegionsByChrom = callset.Manifest.GetManifestRegionsByChromosome();

            // CanvasPartition output file is in the BED format
            //   start: 0-based, inclusive
            //   end: 0-based, exclusive
            // Manifest
            //   start: 1-based, inclusive
            //   end: 1-based, inclusive
            using (GzipReader reader = new GzipReader(rawPartitionedPath.FullName))
            using (GzipWriter writer = new GzipWriter(partitionedPath.FullName))
            {
                string currentChrom = null;
                int manifestRegionIdx = 0;
                string line;
                string[] toks;
                while ((line = reader.ReadLine()) != null)
                {
                    toks = line.Split('\t');
                    string chrom = toks[0];
                    int start = int.Parse(toks[1]) + 1; // 1-based, inclusive
                    int end = int.Parse(toks[2]); // 1-based, inclusive
                    if (chrom != currentChrom)
                    {
                        currentChrom = chrom;
                        manifestRegionIdx = 0;
                    }
                    if (!manifestRegionsByChrom.ContainsKey(currentChrom)) { continue; }
                    while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count
                        && manifestRegionsByChrom[currentChrom][manifestRegionIdx].End < start) // |- manifest region -| |- bin -|
                    {
                        manifestRegionIdx++;
                    }
                    if (manifestRegionIdx >= manifestRegionsByChrom[currentChrom].Count || // |- last manifest region -| |- bin -|
                        end < manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start) // |- bin -| |- manifest region -|
                    {
                        continue; // skip bin
                    }

                    // |- bin -|
                    //       |- manifest region -|
                    while (manifestRegionIdx < manifestRegionsByChrom[currentChrom].Count &&
                        end >= manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start)
                    {
                        // calculate intersection
                        int intersectionStart = Math.Max(start, manifestRegionsByChrom[currentChrom][manifestRegionIdx].Start); // 1-based, inclusive
                        int intersectionEnd = Math.Min(end, manifestRegionsByChrom[currentChrom][manifestRegionIdx].End); // 1-based, inclusive
                                                                                                                          // start/end in BED format
                        toks[1] = String.Format("{0}", intersectionStart - 1); // 0-based, inclusive
                        toks[2] = String.Format("{0}", intersectionEnd); // 0-based, exclusive

                        // write intersected bin
                        writer.WriteLine(String.Join("\t", toks));

                        manifestRegionIdx++;
                    }
                }
            }

            return partitionedPath;
        }
Esempio n. 16
0
        private static void GetBinRatio(string tumorBinnedPath, string normalBinnedPath, string ratioBinnedPath,
            string ploidyBedPath, NexteraManifest manifest = null)
        {
            PloidyInfo referencePloidy = String.IsNullOrEmpty(ploidyBedPath) ? null : PloidyInfo.LoadPloidyFromBedFile(ploidyBedPath);
            double tumorMedian = (new BinCounts(tumorBinnedPath, manifest: manifest)).OnTargetMedianBinCount;
            double normalMedian = (new BinCounts(normalBinnedPath, manifest: manifest)).OnTargetMedianBinCount;
            double librarySizeFactor = (tumorMedian > 0 && normalMedian > 0) ? normalMedian / tumorMedian : 1;

            using (GzipReader tumorReader = new GzipReader(tumorBinnedPath))
            using (GzipReader normalReader = new GzipReader(normalBinnedPath))
            using (GzipWriter writer = new GzipWriter(ratioBinnedPath))
            {
                string normalLine;
                string tumorLine;
                string[] normalToks;
                string[] tumorToks;
                double normalCount;
                double tumorCount;
                double ratio;
                while ((normalLine = normalReader.ReadLine()) != null)
                {
                    tumorLine = tumorReader.ReadLine();
                    normalToks = normalLine.Split('\t');
                    tumorToks = tumorLine.Split('\t');
                    normalCount = double.Parse(normalToks[3]);
                    tumorCount = double.Parse(tumorToks[3]);
                    // The weighted average count of a bin could be less than 1.
                    // Using these small counts for coverage normalization creates large ratios.
                    // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling.
                    if (normalCount < 1) { continue; } // skip the bin
                    string chrom = normalToks[0];
                    int start = int.Parse(normalToks[1]);
                    int end = int.Parse(normalToks[2]);
                    // get the normal ploidy from intervalsWithPloidyByChrom
                    double factor = CanvasDiploidBinRatioFactor * GetPloidy(referencePloidy, chrom, start, end) / 2.0;
                    ratio = tumorCount / normalCount * factor * librarySizeFactor;
                    normalToks[3] = String.Format("{0}", ratio);
                    writer.WriteLine(String.Join("\t", normalToks));
                }
            }
        }
Esempio n. 17
0
		/// <summary>
		///     opens the vcf file and reads the header
		/// </summary>
		private void Open(string vcfPath, bool skipHeader)
		{
			// sanity check: make sure the vcf file exists
			if (!File.Exists(vcfPath))
			{
				throw new FileNotFoundException(string.Format("The specified vcf file ({0}) does not exist.", vcfPath));
			}

			Reader = new GzipReader(vcfPath);
			IsOpen = true;
			if (skipHeader)
			{
				this.Samples.Add("Sample");
			}
			else
			{
				ParseHeader();
			}
		}