/// <summary> /// Write all data from the specified SnpCollection into the SnpCollection /// file having the specified filename (which may include a path). Any /// existing contents of the file will be overwritten. /// </summary> /// <param name="snps">The SnpCollection containing the data to be written.</param> /// <param name="filename">The path and filename of the SnpCollection file.</param> public static void Write(SnpCollection snps, string filename) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); Write(snps, filename, new CancellationToken(false), null); }
public void Read(SnpCollection snps, CancellationToken cancel, Progress progress) { if (this.filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); string ext = Path.GetExtension(filename); if (ext.EndsWith("csv", StringComparison.InvariantCultureIgnoreCase)) { try { Read(Genome.GenomeType.Ftdna, snps, cancel, progress); return; } catch (Exception) { } genome.Clear(); try { Read(Genome.GenomeType.MeAnd23v2, snps, cancel, progress); return; } catch (Exception) { } } else { try { Read(Genome.GenomeType.MeAnd23v2, snps, cancel, progress); return; } catch (Exception) { } genome.Clear(); try { Read(Genome.GenomeType.Ftdna, snps, cancel, progress); return; } catch (Exception) { } } throw new ApplicationException("Could not read as either format."); }
/// <summary> /// Read data into the specified SnpCollection from the SnpCollection /// file having the specified filename (which may include a path). /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="snps">The SnpCollection to receive the read data.</param> /// <param name="filename">The path and filename of the SnpCollection file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> public static void Read(SnpCollection snps, string filename, CancellationToken cancel, Progress progress) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); using (StreamReader reader = new StreamReader(filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string[] columns = new string[6]; string line; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); line.FastSplit(',', columns); byte chr = Convert.ToByte(columns[2]); if (chr == 0) chr = 23; // handles legacy use of 0 for X Snp snp; if ((!String.IsNullOrWhiteSpace(columns[3]) && Char.IsDigit(columns[3][0]))) { // new format snp file snp = new Snp(columns[0], chr, Convert.ToInt32(columns[3]), Convert.ToSingle(columns[4]), columns[1], columns[5]); } else { // old SnpMap format snp file snp = new Snp(columns[0], chr, -1, -1, columns[1], columns[3]); } snps.Add(snp); if (progress != null) progress.Set(reader.BaseStream.Position, length); } } }
static PhasedGenomeFile SimplePhase(string filename, GenomeFile gfile, SnpCollection refSnps) { Console.Write("Creating simple phased genome..."); PhasedGenome phased = new PhasedGenome(gfile.Genome); Console.WriteLine("completed"); PhasedGenomeFile pfile = new PhasedGenomeFile(GetPhasedFilename(filename)); pfile.SetStandardComments(); pfile.AddComment("## history"); pfile.AddComment("## " + DateTime.Now.ToString() + " SimplePhase of " + phased.Count.ToString("#,##0") + " homozygous SNPs (of " + gfile.Genome.Count.ToString("#,##0") + " total)."); pfile.PhasedGenome = phased; return pfile; }
public void Read(Genome.GenomeType genomeType, SnpCollection snps, CancellationToken cancel, Progress progress) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (this.filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); this.comments.Clear(); if (this.genome == null) { this.genome = new Genome(1, 23); } else { this.genome.Clear(); } if (genomeType == Dna.Genome.GenomeType.Ftdna) { ReadFtdnaGenome(snps, cancel, progress); } else { Read23AndMeGenome(snps, cancel, progress); } if (String.IsNullOrWhiteSpace(this.genome.Name)) this.genome.Name = Path.GetFileNameWithoutExtension(this.filename); }
public void Read(SnpCollection snps, CancellationToken cancel, Progress progress) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (this.filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); if (this.genome == null) { this.genome = new PhasedGenome(1, 23); } else { this.genome.Clear(); } this.comments.Clear(); using (StreamReader reader = new StreamReader(this.filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string line; string[] columns = new string[4]; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if ((line.Length > 0) && (line[0] != '#')) { int colCount = line.FastSplit('\t', columns); if ((colCount != 2) && (colCount != 4)) throw new ApplicationException("Not phased genome format."); string rsId = columns[0]; Snp snp = snps[rsId]; if (snp != null) { var alleles = columns[colCount - 1]; var phased = new PhasedGenome.Phasing(alleles[0].ToAllele(), alleles[1].ToAllele()); this.genome.Add(snp, phased); } } else if ((line.Length > 0) && (genome.Count == 0) && (line != header)) { this.comments.Add(line); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } if (String.IsNullOrWhiteSpace(genome.Name)) this.genome.Name = Path.GetFileNameWithoutExtension(filename); }
public static GenomeFile ReadGenome(SnpCollection refSnps, string filename) { GenomeFile result = new GenomeFile(filename); result.Genome = new Genome(1, 23); Console.Write("Reading genome " + result.Name + "..."); result.Read(refSnps, new System.Threading.CancellationToken(), null); Console.WriteLine("completed"); return result; }
public static SnpCollection ReadRefSnps() { SnpCollection refSnps = new SnpCollection(1, 23); Console.Write("Reading reference SNPs..."); SnpFile.Read(refSnps, "RefSnps.csv"); Console.WriteLine("completed"); return refSnps; }
public static void UpdateCentimorgans(string[] args) { SnpCollection snps = new SnpCollection(1, 23); string filename = args[1]; SnpFile.ReadRutgers(snps, filename, new System.Threading.CancellationToken(), null); for (int i = 1; i <= 23; i++) snps.Add(new Snp("fake" + i, (byte) i, 0, 0, null, null)); SnpCollection refSnps = new SnpCollection(1, 23); SnpFile.Read(refSnps, "RefSnps.csv"); Snp prevSnp = null; foreach(Snp snp in refSnps) { Snp rutgerSnp = null; if (snps.Contains(snp.RsId)) { rutgerSnp = snps[snp.RsId]; //if ((rutgerSnp.Chromosome != snp.Chromosome) || (Math.Abs(rutgerSnp.Position-snp.Position) > 10)) { // Console.WriteLine("mismatched pos for " + snp.RsId + " - " + snp.Chromosome + ":" + snp.Position + " vs. " + rutgerSnp.Chromosome + ":" + rutgerSnp.Position); // snp.cM = snps.ExtrapolateCentiMorganPosition(snp.Chromosome, snp.Position); //} else { // snp.cM = snps[snp.RsId].cM; //} snp.Chromosome = rutgerSnp.Chromosome; snp.Position = rutgerSnp.Position; } if ((rutgerSnp != null) && (rutgerSnp.cM > 0)) { snp.cM = rutgerSnp.cM; } else { snp.cM = snps.ExtrapolateCentiMorganPosition(snp.Chromosome, snp.Position); } if ((prevSnp != null) && (prevSnp.Chromosome == snp.Chromosome) && (prevSnp.cM > snp.cM)) { Console.WriteLine("cM out of sequence " + prevSnp.RsId + "-" + snp.RsId); } prevSnp = snp; } SnpFile.Write(refSnps, "RefSnps2.csv"); }
// FTDNA files are are made available by the Family Tree DNA testing // service to their customers. /// <summary> /// Read data into the specified Genome from an individual's /// FTDNA personal genome file, using the specified filename (which /// may include a path). The specified SnpCollection does not need /// to contain all the SNPs that may be present in the genome file. /// The SnpCollection is used as the source of physical and cM /// positional information. Centimorgan values for SNPs present in /// the genome but not in the SnpCollection are extrapolated. /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="genome">The Genome to receive the read data.</param> /// <param name="snps">The SnpCollection containing reference SNP positional data.</param> /// <param name="filename">The path and filename of the FTDNA genome file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> private void ReadFtdnaGenome(SnpCollection snps, CancellationToken cancel, Progress progress) { using (StreamReader reader = new StreamReader(this.filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string[] columns = new string[4]; string line; bool started = false; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if ((line.Length > 0) && (line[0] != '#') && (line[0] != '-') && (started || !line.StartsWith("RSID,", StringComparison.Ordinal))) { started = true; line.Replace("\"", "").FastSplit(',', columns); string rsId = columns[0]; Snp snp = snps[rsId]; if (snp == null) { byte chr = Snp.ChromosomeToByte(columns[1]).Value; if (chr > 23) continue; int position = Convert.ToInt32(columns[2]); snp = new Snp(rsId, chr, position, snps.ExtrapolateCentiMorganPosition(chr, position), null, ""); } var alleles = columns[3]; if ((snp.Chromosome == 23) && (alleles.Length == 1)) alleles += alleles; this.genome.Add(snp, Allele.ToAlleles(alleles)); } else if ((line.Length > 0) && (this.genome.Count == 0) && !line.StartsWith("# rsid\t")) { this.comments.Add(line); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } }
public void Read(PhasedGenome genome, SnpCollection snps, CancellationToken cancel, Progress progress) { this.PhasedGenome = genome; Read(snps, cancel, progress); }
public static void ReadMatchWeights(SnpMap<MatchWeight> matchWeights, SnpCollection snps, string filename, CancellationToken cancel, Progress progress) { if (matchWeights == null) throw new ArgumentNullException("The MatchWeights cannot be null."); if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); using (StreamReader reader = new StreamReader(filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string line; string[] columns = new string[5]; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if (line.Length > 0) { line.FastSplit('\t', columns); string rsId = columns[0]; string majorAllele = columns[1]; double majorWeight, minorWeight; double.TryParse(columns[2], out majorWeight); double.TryParse(columns[4], out minorWeight); if (snps.Contains(rsId)) { Snp snp = snps[rsId]; MatchWeight matchWeight = new MatchWeight(Convert.ToInt32(10*majorWeight), Convert.ToInt32(10*minorWeight)); matchWeights.Add(snp, matchWeight); } } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } }
/// <summary> /// Read data into the specified SnpCollection from the Rutgers SNP /// map file having the specified filename (which may include a path). /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// </summary> /// <remarks>See http://compgen.rutgers.edu/RutgersMap/AffymetrixIllumina.aspx </remarks> /// <param name="snps">The SnpCollection to receive the read data.</param> /// <param name="filename">The path and filename of the Rutgers SNP map file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> public static void ReadRutgers(SnpCollection snps, string filename, CancellationToken cancel, Progress progress) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); using (StreamReader reader = new StreamReader(filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string[] columns = new string[4]; string line; reader.ReadLine(); // skip header while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); line.FastSplit(',', columns); byte? chr = Snp.ChromosomeToByte(columns[1]); if (chr.HasValue && (chr.Value >= 1) && (chr.Value <= 23)) { float cM; Snp snp; if (float.TryParse(columns[3], out cM)) { snp = new Snp(columns[0], chr.Value, Convert.ToInt32(columns[2]), cM, null, null); } else { snp = new Snp(columns[0], chr.Value, Convert.ToInt32(columns[2])); } snps.Add(snp); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } }
/// <summary> /// Write all data from the specified SnpCollection into the SnpCollection /// file having the specified filename (which may include a path). Any /// existing contents of the file will be overwritten. /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="snps">The SnpCollection containing the data to be written.</param> /// <param name="filename">The path and filename of the SnpCollection file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the write.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the write.</param> public static void Write(SnpCollection snps, string filename, CancellationToken cancel, Progress progress) { if (snps == null) throw new ArgumentNullException("The SnpCollection cannot be null."); if (filename == null) throw new ArgumentNullException("filename cannot be null."); if (String.IsNullOrWhiteSpace(filename)) throw new ArgumentOutOfRangeException("filename cannot be empty."); using (StreamWriter writer = new StreamWriter(filename)) { int count = 0; foreach (var snp in snps) { string rsid = snp.RsId; cancel.ThrowIfCancellationRequested(); writer.WriteLine(rsid + "," + snp.AlfredId + "," + Snp.ChromosomeToString(snp.Chromosome) + "," + snp.Position + "," + snp.cM.ToString("0.######") + "," + snp.Alleles.ToAllelesString()); count++; if (progress != null) progress.Set(count, snps.Count); } } }
public static PhasedGenomeFile ReadPhasedGenome(SnpCollection refSnps, string filename) { string phasedFilename = GetPhasedFilename(filename); if (!File.Exists(phasedFilename)) return null; PhasedGenomeFile result = new PhasedGenomeFile(phasedFilename); result.PhasedGenome = new PhasedGenome(1, 23); Console.Write("Reading phased genome " + result.Name + "..."); result.Read(refSnps, new System.Threading.CancellationToken(), null); Console.WriteLine("completed"); return result; }
// 23AndMe files are are made available by the 23AndMe DNA testing service to // their customers. /// <summary> /// Read data into the specified Genome from an individual's /// 23AndMe personal genome file, using the specified filename (which /// may include a path). The specified SnpCollection does not need /// to contain all the SNPs that may be present in the genome file. /// The SnpCollection is used as the source of physical and cM /// positional information. Centimorgan values for SNPs present in /// the genome but not in the SnpCollection are extrapolated. /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="genome">The Genome to receive the read data.</param> /// <param name="snps">The SnpCollection containing reference SNP positional data.</param> /// <param name="filename">The path and filename of the 23AndMe genome file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> private void Read23AndMeGenome(SnpCollection snps, CancellationToken cancel, Progress progress) { using (StreamReader reader = new StreamReader(this.filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string line; string[] columns = new string[4]; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if ((line.Length > 0) && (line[0] != '#') && (line[0] != '-')) { int colCount = line.FastSplit('\t', columns); if (colCount <= 1) throw new ApplicationException("Not 23andMe format."); string rsId = columns[0]; Snp snp = snps[rsId]; if ((snp == null) && (colCount == 4)) { byte chr = Snp.ChromosomeToByte(columns[1]).Value; if (chr <= 23) { int position = Convert.ToInt32(columns[2]); snp = new Snp(rsId, chr, position, snps.ExtrapolateCentiMorganPosition(chr, position), null, ""); } } if (snp != null) { var alleles = columns[colCount - 1]; if ((alleles != null) && (snp.Chromosome == 23) && (alleles.Length == 1)) alleles += alleles; this.genome.Add(snp, Allele.ToAlleles(alleles)); } } else if ((line.Length > 0) && (genome.Count == 0) && !line.StartsWith("# rsid\t")) { this.comments.Add(line); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } if (this.genome.Count < 700000) { this.genome.GenomeTestType = Genome.GenomeType.MeAnd23v2; } else { this.genome.GenomeTestType = Genome.GenomeType.MeAnd23v3; } }
/// <summary> /// Gets an SnpCollection of the SNPs contained in this Genome. /// </summary> /// <returns>An SnpCollection.</returns> protected SnpCollection GetSnps() { lock (lockObj) { if (this.snps == null) { var looseSnps = this.genotypes.Keys.ToArray<Snp>(); //Array.Sort(looseSnps); ushort minChr = ushort.MaxValue; ushort maxChr = ushort.MinValue; foreach (var snp in looseSnps) { if (snp.Chromosome < minChr) minChr = snp.Chromosome; if (snp.Chromosome > maxChr) maxChr = snp.Chromosome; } snps = new SnpCollection(minChr, maxChr); foreach (var snp in looseSnps) snps.Add(snp); } return this.snps; } }