コード例 #1
0
ファイル: SNPItemUtils.cs プロジェクト: shengqh/CQS.Core
    /// <summary>
    /// Fill reference allele from genome fasta file.
    /// </summary>
    /// <param name="snpItems"></param>
    /// <param name="fastaFile"></param>
    /// <param name="progress"></param>
    public static void FillReferenceAlleleFromFasta(this IEnumerable<SNPItem> snpItems, string fastaFile, IProgressCallback progress = null)
    {
      if (progress == null)
      {
        progress = new ConsoleProgressCallback();
      }

      var dic = snpItems.ToGroupDictionary(m => m.Chrom);

      progress.SetMessage("Filling reference allele from {0} file ...", fastaFile);
      using (var sw = new StreamReader(fastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sw)) != null)
        {
          progress.SetMessage("chromosome " + seq.Name + " ...");
          var chr = HumanChromosomeToInt(seq.Name);
          if (dic.ContainsKey(chr))
          {
            var snps = dic[chr];
            foreach (var snp in snps)
            {
              snp.RefChar = char.ToUpper(seq.SeqString[snp.Position - 1]);
            }
          }
        }
      }
      progress.SetMessage("Filling reference allele finished.");
    }
コード例 #2
0
ファイル: SNPItemUtils.cs プロジェクト: shengqh/CQS.Core
    private static void DoFillAllele2FrequencyFrom1000Gome(this IEnumerable<SNPItem> snpItems, string g1000VcfFile, Func<SNPItem, string> keyFunc, IProgressCallback progress = null)
    {
      if (progress == null)
      {
        progress = new ConsoleProgressCallback();
      }

      var dic = snpItems.ToDictionary(m => keyFunc(m));

      progress.SetMessage("Filling MAF from {0} ...", g1000VcfFile);
      using (var sr = new StreamReader(g1000VcfFile))
      {
        progress.SetRange(0, sr.BaseStream.Length);

        string line;
        while ((line = sr.ReadLine()) != null)
        {
          if (!line.StartsWith("##"))
          {
            break;
          }
        }

        int linecount = 0;
        while ((line = sr.ReadLine()) != null)
        {
          linecount++;

          if (linecount % 10000 == 0)
          {
            progress.SetPosition(sr.GetCharpos());
          }

          var parts = line.Split('\t');
          var snp = new SNPItem()
          {
            Chrom = HumanChromosomeToInt(parts[0]),
            Position = int.Parse(parts[1]),
            Name = parts[2]
          };

          SNPItem loc;
          if (!dic.TryGetValue(keyFunc(snp), out loc))
          {
            continue;
          }

          loc.G1000Allele1 = parts[3][0];
          var allele2 = parts[4].Split(',');
          var frequencies = parts[7].StringAfter("AF=").StringBefore(";").Split(',');
          bool bFound = false;
          for (int i = 0; i < allele2.Length; i++)
          {
            if (allele2[i].Length != 1)
            {
              continue;
            }

            loc.G1000Allele2 = allele2[i][0];
            if (loc.IsSourceAllelesMatchedWithG1000())
            {
              loc.G1000Allele2Frequency = double.Parse(frequencies[i]);
              bFound = true;
              break;
            }
          }

          if (!bFound)
          {
            loc.G1000Allele1 = ' ';
            loc.G1000Allele2 = ' ';
            loc.G1000Allele2Frequency = 0.0;
          }
        }
      }
      progress.SetMessage("Filling MAF finished.");
    }