Exemple #1
0
        public static List <SNPItem> ReadFromFile(string fileName)
        {
            var result = new List <SNPItem>();

            using (var sr = new StreamReader(fileName))
            {
                var line    = sr.ReadLine();
                var hasFlip = line.Contains("_AlleleNeedFlip");

                var platforms = line.Split('\t').ToList().ConvertAll(m => m.StringBefore("_Allele"));
                var step      = hasFlip ? 3 : 2;
                while ((line = sr.ReadLine()) != null)
                {
                    var parts = line.Split('\t');
                    //sw.WriteLine("Dbsnp_ID\tCHROM\tPOS\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\t1000G_REF\t1000G_ALT\t1000G_MAF\tDataset\t{0}",

                    var item = new SNPItem()
                    {
                        Name                  = parts[0],
                        Chrom                 = int.Parse(parts[1]),
                        Position              = int.Parse(parts[2]),
                        Allele1               = parts[3][0],
                        Allele2               = parts[4],
                        DbsnpIsReversed       = bool.Parse(parts[5]),
                        G1000Allele1          = parts[6][0],
                        G1000Allele2          = parts[7][0],
                        G1000Allele2Frequency = double.Parse(parts[8]),
                        Dataset               = parts[9]
                    };

                    for (int i = 10; i < parts.Length; i += step)
                    {
                        if (string.IsNullOrEmpty(parts[i]))
                        {
                            continue;
                        }

                        var alleles = parts[i].Split(':');
                        var aitem   = new Alleles()
                        {
                            Allele1 = alleles[0],
                            Allele2 = alleles[1]
                        };
                        aitem.Allele2Frequency = double.Parse(parts[i + 1]);
                        if (hasFlip)
                        {
                            aitem.NeedFlip = bool.Parse(parts[i + 2]);
                        }
                        item.Platforms[platforms[i]] = aitem;
                    }

                    result.Add(item);
                }
            }
            return(result);
        }
        public override IEnumerable <string> Process()
        {
            var targetSNPs = SNPItem.ReadFromFile(_options.TargetSnpFile);

            using (var sw = new StreamWriter(_options.OutputFile))
                using (var swInfo = new StreamWriter(_options.OutputFile + ".info"))
                {
                    swInfo.WriteLine("MarkerId\tIsImputed");
                    foreach (var file in _options.InputFiles)
                    {
                        int chromosome = DetectChromosome(file);
                        Progress.SetMessage("Chromosome {0} : {1}", chromosome, file);

                        var locusMap = targetSNPs.Where(m => m.Chrom == chromosome).ToDictionary(m => m.Position.ToString());
                        using (var sr = new StreamReader(file))
                        {
                            string  line;
                            SNPItem item;
                            while ((line = sr.ReadLine()) != null)
                            {
                                string[] parts     = line.Take(delimiter, 3);
                                bool     isImputed = parts[0].Equals("---");
                                if (isImputed)
                                {
                                    if (locusMap.TryGetValue(parts[2], out item))
                                    {
                                        var name     = string.IsNullOrEmpty(item.Name) ? parts[1] : item.Name;
                                        var markerid = string.IsNullOrEmpty(item.Dataset) ? name : item.Dataset + ":" + name;
                                        sw.WriteLine("{0} {1}{2}",
                                                     item.Chrom,
                                                     markerid,
                                                     line.StringAfter(parts[1]));
                                        swInfo.WriteLine("{0}\t{1}", markerid, isImputed);
                                    }
                                }
                                else
                                {
                                    sw.WriteLine(line);
                                    swInfo.WriteLine("{0}\t{1}", parts[1], isImputed);
                                }
                            }
                        }
                    }
                }

            return(new[] { _options.OutputFile });
        }
        private static void DoFillAllele2FrequencyFrom1000Gome(this IEnumerable <SNPItem> snpItems, string g1000VcfFile, Func <SNPItem, string> keyFunc, IProgressCallback progress = null)
        {
            if (progress == null)
            {
                progress = new ConsoleProgressCallback();
            }

            var dic = snpItems.ToDictionary(m => keyFunc(m));

            progress.SetMessage("Filling MAF from {0} ...", g1000VcfFile);
            using (var sr = new StreamReader(g1000VcfFile))
            {
                progress.SetRange(0, sr.BaseStream.Length);

                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    if (!line.StartsWith("##"))
                    {
                        break;
                    }
                }

                int linecount = 0;
                while ((line = sr.ReadLine()) != null)
                {
                    linecount++;

                    if (linecount % 10000 == 0)
                    {
                        progress.SetPosition(sr.GetCharpos());
                    }

                    var parts = line.Split('\t');
                    var snp   = new SNPItem()
                    {
                        Chrom    = HumanChromosomeToInt(parts[0]),
                        Position = int.Parse(parts[1]),
                        Name     = parts[2]
                    };

                    SNPItem loc;
                    if (!dic.TryGetValue(keyFunc(snp), out loc))
                    {
                        continue;
                    }

                    loc.G1000Allele1 = parts[3][0];
                    var  allele2     = parts[4].Split(',');
                    var  frequencies = parts[7].StringAfter("AF=").StringBefore(";").Split(',');
                    bool bFound      = false;
                    for (int i = 0; i < allele2.Length; i++)
                    {
                        if (allele2[i].Length != 1)
                        {
                            continue;
                        }

                        loc.G1000Allele2 = allele2[i][0];
                        if (loc.IsSourceAllelesMatchedWithG1000())
                        {
                            loc.G1000Allele2Frequency = double.Parse(frequencies[i]);
                            bFound = true;
                            break;
                        }
                    }

                    if (!bFound)
                    {
                        loc.G1000Allele1          = ' ';
                        loc.G1000Allele2          = ' ';
                        loc.G1000Allele2Frequency = 0.0;
                    }
                }
            }
            progress.SetMessage("Filling MAF finished.");
        }
Exemple #4
0
    public static List<SNPItem> ReadFromFile(string fileName)
    {
      var result = new List<SNPItem>();
      using (var sr = new StreamReader(fileName))
      {
        var line = sr.ReadLine();
        var hasFlip = line.Contains("_AlleleNeedFlip");

        var platforms = line.Split('\t').ToList().ConvertAll(m => m.StringBefore("_Allele"));
        var step = hasFlip ? 3 : 2;
        while ((line = sr.ReadLine()) != null)
        {
          var parts = line.Split('\t');
          //sw.WriteLine("Dbsnp_ID\tCHROM\tPOS\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\t1000G_REF\t1000G_ALT\t1000G_MAF\tDataset\t{0}",

          var item = new SNPItem()
          {
            Name = parts[0],
            Chrom = int.Parse(parts[1]),
            Position = int.Parse(parts[2]),
            Allele1 = parts[3][0],
            Allele2 = parts[4],
            DbsnpIsReversed = bool.Parse(parts[5]),
            G1000Allele1 = parts[6][0],
            G1000Allele2 = parts[7][0],
            G1000Allele2Frequency = double.Parse(parts[8]),
            Dataset = parts[9]
          };

          for (int i = 10; i < parts.Length; i += step)
          {
            if (string.IsNullOrEmpty(parts[i]))
            {
              continue;
            }

            var alleles = parts[i].Split(':');
            var aitem = new Alleles()
            {
              Allele1 = alleles[0],
              Allele2 = alleles[1]
            };
            aitem.Allele2Frequency = double.Parse(parts[i + 1]);
            if (hasFlip)
            {
              aitem.NeedFlip = bool.Parse(parts[i + 2]);
            }
            item.Platforms[platforms[i]] = aitem;
          }

          result.Add(item);
        }
      }
      return result;
    }
Exemple #5
0
    private static void DoFillAllele2FrequencyFrom1000Gome(this IEnumerable<SNPItem> snpItems, string g1000VcfFile, Func<SNPItem, string> keyFunc, IProgressCallback progress = null)
    {
      if (progress == null)
      {
        progress = new ConsoleProgressCallback();
      }

      var dic = snpItems.ToDictionary(m => keyFunc(m));

      progress.SetMessage("Filling MAF from {0} ...", g1000VcfFile);
      using (var sr = new StreamReader(g1000VcfFile))
      {
        progress.SetRange(0, sr.BaseStream.Length);

        string line;
        while ((line = sr.ReadLine()) != null)
        {
          if (!line.StartsWith("##"))
          {
            break;
          }
        }

        int linecount = 0;
        while ((line = sr.ReadLine()) != null)
        {
          linecount++;

          if (linecount % 10000 == 0)
          {
            progress.SetPosition(sr.GetCharpos());
          }

          var parts = line.Split('\t');
          var snp = new SNPItem()
          {
            Chrom = HumanChromosomeToInt(parts[0]),
            Position = int.Parse(parts[1]),
            Name = parts[2]
          };

          SNPItem loc;
          if (!dic.TryGetValue(keyFunc(snp), out loc))
          {
            continue;
          }

          loc.G1000Allele1 = parts[3][0];
          var allele2 = parts[4].Split(',');
          var frequencies = parts[7].StringAfter("AF=").StringBefore(";").Split(',');
          bool bFound = false;
          for (int i = 0; i < allele2.Length; i++)
          {
            if (allele2[i].Length != 1)
            {
              continue;
            }

            loc.G1000Allele2 = allele2[i][0];
            if (loc.IsSourceAllelesMatchedWithG1000())
            {
              loc.G1000Allele2Frequency = double.Parse(frequencies[i]);
              bFound = true;
              break;
            }
          }

          if (!bFound)
          {
            loc.G1000Allele1 = ' ';
            loc.G1000Allele2 = ' ';
            loc.G1000Allele2Frequency = 0.0;
          }
        }
      }
      progress.SetMessage("Filling MAF finished.");
    }