Пример #1
0
 public static PloidyInfo LoadPloidyFromBedFile(string filePath)
 {
     PloidyInfo ploidy = new PloidyInfo();
     int count = 0;
     using (GzipReader reader = new GzipReader(filePath))
     {
         while (true)
         {
             string fileLine = reader.ReadLine();
             if (fileLine == null) break;
             if (fileLine.StartsWith("##ExpectedSexChromosomeKaryotype"))
             {
                 ploidy.HeaderLine = fileLine.Trim();
                 continue;
             }
             if (fileLine.Length == 0 || fileLine[0] == '#') continue;
             string[] bits = fileLine.Split('\t');
             string chromosome = bits[0];
             if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
             {
                 ploidy.PloidyByChromosome[chromosome] = new List<PloidyInterval>();
             }
             PloidyInterval interval = new PloidyInterval();
             interval.Start = int.Parse(bits[1]);
             interval.End = int.Parse(bits[2]);
             interval.Ploidy = int.Parse(bits[4]);
             ploidy.PloidyByChromosome[chromosome].Add(interval);
             count++;
         }
     }
     Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count);
     return ploidy;
 }
Пример #2
0
        public static PloidyInfo LoadPloidyFromVcfFile(string vcfPath, string sampleName)
        {
            int        sampleIndex = 0;
            PloidyInfo ploidy      = new PloidyInfo();

            using (VcfReader reader = new VcfReader(vcfPath))
            {
                if (!sampleName.IsNullOrEmpty())
                {
                    if (!sampleName.IsNullOrEmpty() && reader.Samples.Count < 2)
                    {
                        throw new ArgumentException(
                                  $"File '{vcfPath}' must be a multi-sample sample VCF containing > 1 samples");
                    }
                    if (reader.Samples.Select(x => Convert.ToInt32(x == sampleName)).Sum() != 1)
                    {
                        throw new ArgumentException(
                                  $"File '{vcfPath}' should contain one genotypes column corresponding to sample {sampleName}");
                    }
                    sampleIndex = reader.Samples.IndexOf(sampleName);
                }

                ploidy.HeaderLine = string.Join(" ", reader.HeaderLines);

                while (true)
                {
                    VcfVariant record;
                    bool       result = reader.GetNextVariant(out record);
                    if (!result)
                    {
                        break;
                    }
                    string chromosome = record.ReferenceName;
                    if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
                    {
                        ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>();
                    }
                    PloidyInterval interval = new PloidyInterval(chromosome);
                    interval.Start = record.ReferencePosition;
                    interval.End   = int.Parse(record.InfoFields["END"]);
                    var genotypeColumn = record.GenotypeColumns[sampleIndex];
                    if (genotypeColumn.ContainsKey("CN"))
                    {
                        var value = genotypeColumn["CN"];
                        interval.Ploidy = value == "." ? 2 : int.Parse(value);
                    }
                    else
                    {
                        throw new ArgumentException($"File '{vcfPath}' must contain one genotype CN column!");
                    }
                    ploidy.PloidyByChromosome[chromosome].Add(interval);
                }
            }
            return(ploidy);
        }
Пример #3
0
        public static PloidyInfo LoadPloidyFromBedFile(string filePath)
        {
            PloidyInfo ploidy = new PloidyInfo();

            if (string.IsNullOrEmpty(filePath))
            {
                return(ploidy);
            }
            int count = 0;

            using (GzipReader reader = new GzipReader(filePath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    // save anything that looks like a vcf header line (we will add it to the output vcf)
                    // TODO: support adding multiple header lines to the output vcf
                    if (fileLine.StartsWith("##"))
                    {
                        ploidy.HeaderLine = fileLine.Trim();
                        continue;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;
                    }
                    string[] bits       = fileLine.Split('\t');
                    string   chromosome = bits[0];
                    if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
                    {
                        ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>();
                    }
                    PloidyInterval interval = new PloidyInterval(chromosome);
                    interval.Start  = int.Parse(bits[1]);
                    interval.End    = int.Parse(bits[2]);
                    interval.Ploidy = int.Parse(bits[4]);
                    ploidy.PloidyByChromosome[chromosome].Add(interval);
                    count++;
                }
            }
            Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count);
            return(ploidy);
        }
Пример #4
0
        private static PloidyInfo LoadPloidyFromVcfFile(string vcfPath, int sampleIndex)
        {
            PloidyInfo ploidy = new PloidyInfo();

            using (VcfReader reader = new VcfReader(vcfPath))
            {
                //the ploidy.vcf header lines need to be updated to include reference sex chromosome info for one or multiple samples
                //ploidy.HeaderLine = string.Join(" ", reader.HeaderLines);

                while (true)
                {
                    bool result = reader.GetNextVariant(out var record);
                    if (!result)
                    {
                        break;
                    }
                    string chromosome = record.ReferenceName;
                    if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
                    {
                        ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>();
                    }
                    PloidyInterval interval = new PloidyInterval(chromosome)
                    {
                        Start = record.ReferencePosition,
                        End   = int.Parse(record.InfoFields["END"])
                    };
                    var genotypeColumn = record.GenotypeColumns[sampleIndex];
                    if (genotypeColumn.ContainsKey("CN"))
                    {
                        var value = genotypeColumn["CN"];
                        interval.Ploidy = value == "." ? 2 : int.Parse(value);
                    }
                    else
                    {
                        throw new ArgumentException($"File '{vcfPath}' must contain one genotype CN column!");
                    }
                    ploidy.PloidyByChromosome[chromosome].Add(interval);
                }
            }
            return(ploidy);
        }
Пример #5
0
        public static PloidyInfo LoadPloidyFromBedFile(string filePath)
        {
            PloidyInfo ploidy = new PloidyInfo();
            int        count  = 0;

            using (GzipReader reader = new GzipReader(filePath))
            {
                while (true)
                {
                    string fileLine = reader.ReadLine();
                    if (fileLine == null)
                    {
                        break;
                    }
                    if (fileLine.StartsWith("##ExpectedSexChromosomeKaryotype"))
                    {
                        ploidy.HeaderLine = fileLine.Trim();
                        continue;
                    }
                    if (fileLine.Length == 0 || fileLine[0] == '#')
                    {
                        continue;
                    }
                    string[] bits       = fileLine.Split('\t');
                    string   chromosome = bits[0];
                    if (!ploidy.PloidyByChromosome.ContainsKey(chromosome))
                    {
                        ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>();
                    }
                    PloidyInterval interval = new PloidyInterval();
                    interval.Start  = int.Parse(bits[1]);
                    interval.End    = int.Parse(bits[2]);
                    interval.Ploidy = int.Parse(bits[4]);
                    ploidy.PloidyByChromosome[chromosome].Add(interval);
                    count++;
                }
            }
            Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count);
            return(ploidy);
        }