GetSegmentsByChromosome() public static method

public static GetSegmentsByChromosome ( List segments ) : List>.Dictionary
segments List
return List>.Dictionary
Exemplo n.º 1
0
        /// <summary>
        /// Parse the outputs of CanvasSNV, and note these variant frequencies in the appropriate segment.
        /// </summary>
        public static float LoadVariantFrequencies(string variantFrequencyFile, List <CanvasSegment> segments)
        {
            Console.WriteLine("{0} Load variant frequencies from {1}", DateTime.Now, variantFrequencyFile);
            int count = 0;
            Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(segments);
            Dictionary <string, string> alternativeNames = GetChromosomeAlternativeNames(segmentsByChromosome.Keys);
            long totalCoverage = 0;
            int  totalRecords  = 0;

            using (GzipReader reader = new GzipReader(variantFrequencyFile))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;                                             // Skip headers
                    }
                    string[] bits = fileLine.Split('\t');
                    if (bits.Length < 6)
                    {
                        Console.Error.WriteLine("* Bad line in {0}: '{1}'", variantFrequencyFile, fileLine);
                        continue;
                    }
                    string chromosome = bits[0];
                    if (!segmentsByChromosome.ContainsKey(chromosome))
                    {
                        if (alternativeNames.ContainsKey(chromosome))
                        {
                            chromosome = alternativeNames[chromosome];
                        }
                        else
                        {
                            continue;
                        }
                    }

                    int position = int.Parse(bits[1]); // 1-based (from the input VCF to Canvas SNV)
                    int countRef = int.Parse(bits[4]);
                    int countAlt = int.Parse(bits[5]);
                    if (countRef + countAlt < 10)
                    {
                        continue;
                    }
                    float VF = countAlt / (float)(countRef + countAlt);
                    // Binary search for the segment this variant hits:
                    List <CanvasSegment> chrSegments = segmentsByChromosome[chromosome];
                    int start = 0;
                    int end   = chrSegments.Count - 1;
                    int mid   = (start + end) / 2;
                    while (start <= end)
                    {
                        if (chrSegments[mid].End < position) // CanvasSegment.End is already 1-based
                        {
                            start = mid + 1;
                            mid   = (start + end) / 2;
                            continue;
                        }
                        if (chrSegments[mid].Begin + 1 > position) // Convert CanvasSegment.Begin to 1-based by adding 1
                        {
                            end = mid - 1;
                            mid = (start + end) / 2;
                            continue;
                        }
                        chrSegments[mid].VariantFrequencies.Add(VF);
                        chrSegments[mid].VariantTotalCoverage.Add(countRef + countAlt);
                        count++;
                        totalCoverage += (countRef + countAlt); // use only coverage information in segments
                        totalRecords++;
                        break;
                    }
                }
            }
            float meanCoverage = 0;

            if (totalRecords > 0)
            {
                meanCoverage = totalCoverage / Math.Max(1f, totalRecords);
            }
            Console.WriteLine("{0} Loaded a total of {1} usable variant frequencies", DateTime.Now, count);
            return(meanCoverage);
        }