// FTDNA files are are made available by the Family Tree DNA testing // service to their customers. /// <summary> /// Read data into the specified Genome from an individual's /// FTDNA personal genome file, using the specified filename (which /// may include a path). The specified SnpCollection does not need /// to contain all the SNPs that may be present in the genome file. /// The SnpCollection is used as the source of physical and cM /// positional information. Centimorgan values for SNPs present in /// the genome but not in the SnpCollection are extrapolated. /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="genome">The Genome to receive the read data.</param> /// <param name="snps">The SnpCollection containing reference SNP positional data.</param> /// <param name="filename">The path and filename of the FTDNA genome file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> private void ReadFtdnaGenome(SnpCollection snps, CancellationToken cancel, Progress progress) { using (StreamReader reader = new StreamReader(this.filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string[] columns = new string[4]; string line; bool started = false; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if ((line.Length > 0) && (line[0] != '#') && (line[0] != '-') && (started || !line.StartsWith("RSID,", StringComparison.Ordinal))) { started = true; line.Replace("\"", "").FastSplit(',', columns); string rsId = columns[0]; Snp snp = snps[rsId]; if (snp == null) { byte chr = Snp.ChromosomeToByte(columns[1]).Value; if (chr > 23) continue; int position = Convert.ToInt32(columns[2]); snp = new Snp(rsId, chr, position, snps.ExtrapolateCentiMorganPosition(chr, position), null, ""); } var alleles = columns[3]; if ((snp.Chromosome == 23) && (alleles.Length == 1)) alleles += alleles; this.genome.Add(snp, Allele.ToAlleles(alleles)); } else if ((line.Length > 0) && (this.genome.Count == 0) && !line.StartsWith("# rsid\t")) { this.comments.Add(line); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } }
public static void UpdateCentimorgans(string[] args) { SnpCollection snps = new SnpCollection(1, 23); string filename = args[1]; SnpFile.ReadRutgers(snps, filename, new System.Threading.CancellationToken(), null); for (int i = 1; i <= 23; i++) snps.Add(new Snp("fake" + i, (byte) i, 0, 0, null, null)); SnpCollection refSnps = new SnpCollection(1, 23); SnpFile.Read(refSnps, "RefSnps.csv"); Snp prevSnp = null; foreach(Snp snp in refSnps) { Snp rutgerSnp = null; if (snps.Contains(snp.RsId)) { rutgerSnp = snps[snp.RsId]; //if ((rutgerSnp.Chromosome != snp.Chromosome) || (Math.Abs(rutgerSnp.Position-snp.Position) > 10)) { // Console.WriteLine("mismatched pos for " + snp.RsId + " - " + snp.Chromosome + ":" + snp.Position + " vs. " + rutgerSnp.Chromosome + ":" + rutgerSnp.Position); // snp.cM = snps.ExtrapolateCentiMorganPosition(snp.Chromosome, snp.Position); //} else { // snp.cM = snps[snp.RsId].cM; //} snp.Chromosome = rutgerSnp.Chromosome; snp.Position = rutgerSnp.Position; } if ((rutgerSnp != null) && (rutgerSnp.cM > 0)) { snp.cM = rutgerSnp.cM; } else { snp.cM = snps.ExtrapolateCentiMorganPosition(snp.Chromosome, snp.Position); } if ((prevSnp != null) && (prevSnp.Chromosome == snp.Chromosome) && (prevSnp.cM > snp.cM)) { Console.WriteLine("cM out of sequence " + prevSnp.RsId + "-" + snp.RsId); } prevSnp = snp; } SnpFile.Write(refSnps, "RefSnps2.csv"); }
// 23AndMe files are are made available by the 23AndMe DNA testing service to // their customers. /// <summary> /// Read data into the specified Genome from an individual's /// 23AndMe personal genome file, using the specified filename (which /// may include a path). The specified SnpCollection does not need /// to contain all the SNPs that may be present in the genome file. /// The SnpCollection is used as the source of physical and cM /// positional information. Centimorgan values for SNPs present in /// the genome but not in the SnpCollection are extrapolated. /// The specified CancellationToken can be used to abort the read. /// The progress parameter will be updated by this method with a /// value of 0-100 to reflect the percent progress of the read. /// </summary> /// <param name="genome">The Genome to receive the read data.</param> /// <param name="snps">The SnpCollection containing reference SNP positional data.</param> /// <param name="filename">The path and filename of the 23AndMe genome file.</param> /// <param name="cancel">The CancellationToken that can be used to abort the read.</param> /// <param name="progress">The progress parameter that will be updated to reflect /// the percent progress of the read.</param> private void Read23AndMeGenome(SnpCollection snps, CancellationToken cancel, Progress progress) { using (StreamReader reader = new StreamReader(this.filename)) { long length = 0; if (progress != null) length = reader.BaseStream.Length; string line; string[] columns = new string[4]; while ((line = reader.ReadLine()) != null) { cancel.ThrowIfCancellationRequested(); if ((line.Length > 0) && (line[0] != '#') && (line[0] != '-')) { int colCount = line.FastSplit('\t', columns); if (colCount <= 1) throw new ApplicationException("Not 23andMe format."); string rsId = columns[0]; Snp snp = snps[rsId]; if ((snp == null) && (colCount == 4)) { byte chr = Snp.ChromosomeToByte(columns[1]).Value; if (chr <= 23) { int position = Convert.ToInt32(columns[2]); snp = new Snp(rsId, chr, position, snps.ExtrapolateCentiMorganPosition(chr, position), null, ""); } } if (snp != null) { var alleles = columns[colCount - 1]; if ((alleles != null) && (snp.Chromosome == 23) && (alleles.Length == 1)) alleles += alleles; this.genome.Add(snp, Allele.ToAlleles(alleles)); } } else if ((line.Length > 0) && (genome.Count == 0) && !line.StartsWith("# rsid\t")) { this.comments.Add(line); } if (progress != null) progress.Set(reader.BaseStream.Position, length); } } if (this.genome.Count < 700000) { this.genome.GenomeTestType = Genome.GenomeType.MeAnd23v2; } else { this.genome.GenomeTestType = Genome.GenomeType.MeAnd23v3; } }