public static PloidyInfo LoadPloidyFromBedFile(string filePath) { PloidyInfo ploidy = new PloidyInfo(); int count = 0; using (GzipReader reader = new GzipReader(filePath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) break; if (fileLine.StartsWith("##ExpectedSexChromosomeKaryotype")) { ploidy.HeaderLine = fileLine.Trim(); continue; } if (fileLine.Length == 0 || fileLine[0] == '#') continue; string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (!ploidy.PloidyByChromosome.ContainsKey(chromosome)) { ploidy.PloidyByChromosome[chromosome] = new List<PloidyInterval>(); } PloidyInterval interval = new PloidyInterval(); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); interval.Ploidy = int.Parse(bits[4]); ploidy.PloidyByChromosome[chromosome].Add(interval); count++; } } Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count); return ploidy; }
public static PloidyInfo LoadPloidyFromVcfFile(string vcfPath, string sampleName) { int sampleIndex = 0; PloidyInfo ploidy = new PloidyInfo(); using (VcfReader reader = new VcfReader(vcfPath)) { if (!sampleName.IsNullOrEmpty()) { if (!sampleName.IsNullOrEmpty() && reader.Samples.Count < 2) { throw new ArgumentException( $"File '{vcfPath}' must be a multi-sample sample VCF containing > 1 samples"); } if (reader.Samples.Select(x => Convert.ToInt32(x == sampleName)).Sum() != 1) { throw new ArgumentException( $"File '{vcfPath}' should contain one genotypes column corresponding to sample {sampleName}"); } sampleIndex = reader.Samples.IndexOf(sampleName); } ploidy.HeaderLine = string.Join(" ", reader.HeaderLines); while (true) { VcfVariant record; bool result = reader.GetNextVariant(out record); if (!result) { break; } string chromosome = record.ReferenceName; if (!ploidy.PloidyByChromosome.ContainsKey(chromosome)) { ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>(); } PloidyInterval interval = new PloidyInterval(chromosome); interval.Start = record.ReferencePosition; interval.End = int.Parse(record.InfoFields["END"]); var genotypeColumn = record.GenotypeColumns[sampleIndex]; if (genotypeColumn.ContainsKey("CN")) { var value = genotypeColumn["CN"]; interval.Ploidy = value == "." ? 2 : int.Parse(value); } else { throw new ArgumentException($"File '{vcfPath}' must contain one genotype CN column!"); } ploidy.PloidyByChromosome[chromosome].Add(interval); } } return(ploidy); }
public static PloidyInfo LoadPloidyFromBedFile(string filePath) { PloidyInfo ploidy = new PloidyInfo(); if (string.IsNullOrEmpty(filePath)) { return(ploidy); } int count = 0; using (GzipReader reader = new GzipReader(filePath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } // save anything that looks like a vcf header line (we will add it to the output vcf) // TODO: support adding multiple header lines to the output vcf if (fileLine.StartsWith("##")) { ploidy.HeaderLine = fileLine.Trim(); continue; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (!ploidy.PloidyByChromosome.ContainsKey(chromosome)) { ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>(); } PloidyInterval interval = new PloidyInterval(chromosome); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); interval.Ploidy = int.Parse(bits[4]); ploidy.PloidyByChromosome[chromosome].Add(interval); count++; } } Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count); return(ploidy); }
private static PloidyInfo LoadPloidyFromVcfFile(string vcfPath, int sampleIndex) { PloidyInfo ploidy = new PloidyInfo(); using (VcfReader reader = new VcfReader(vcfPath)) { //the ploidy.vcf header lines need to be updated to include reference sex chromosome info for one or multiple samples //ploidy.HeaderLine = string.Join(" ", reader.HeaderLines); while (true) { bool result = reader.GetNextVariant(out var record); if (!result) { break; } string chromosome = record.ReferenceName; if (!ploidy.PloidyByChromosome.ContainsKey(chromosome)) { ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>(); } PloidyInterval interval = new PloidyInterval(chromosome) { Start = record.ReferencePosition, End = int.Parse(record.InfoFields["END"]) }; var genotypeColumn = record.GenotypeColumns[sampleIndex]; if (genotypeColumn.ContainsKey("CN")) { var value = genotypeColumn["CN"]; interval.Ploidy = value == "." ? 2 : int.Parse(value); } else { throw new ArgumentException($"File '{vcfPath}' must contain one genotype CN column!"); } ploidy.PloidyByChromosome[chromosome].Add(interval); } } return(ploidy); }
public static PloidyInfo LoadPloidyFromBedFile(string filePath) { PloidyInfo ploidy = new PloidyInfo(); int count = 0; using (GzipReader reader = new GzipReader(filePath)) { while (true) { string fileLine = reader.ReadLine(); if (fileLine == null) { break; } if (fileLine.StartsWith("##ExpectedSexChromosomeKaryotype")) { ploidy.HeaderLine = fileLine.Trim(); continue; } if (fileLine.Length == 0 || fileLine[0] == '#') { continue; } string[] bits = fileLine.Split('\t'); string chromosome = bits[0]; if (!ploidy.PloidyByChromosome.ContainsKey(chromosome)) { ploidy.PloidyByChromosome[chromosome] = new List <PloidyInterval>(); } PloidyInterval interval = new PloidyInterval(); interval.Start = int.Parse(bits[1]); interval.End = int.Parse(bits[2]); interval.Ploidy = int.Parse(bits[4]); ploidy.PloidyByChromosome[chromosome].Add(interval); count++; } } Console.WriteLine("Reference ploidy: Loaded {0} intervals across {1} chromosomes", count, ploidy.PloidyByChromosome.Keys.Count); return(ploidy); }