Пример #1
0
        public void TestReadFromFasta()
        {
            FastaFormat     ff   = new FastaFormat();
            List <Sequence> seqs = SequenceUtils.Read(ff, @TestContext.CurrentContext.TestDirectory + "/../../../data//Standard_Protein_FIT_060222.noredundant.fasta");

            Assert.AreEqual(43, seqs.Count);
        }
Пример #2
0
    /// <summary>
    /// Fill reference allele from genome fasta file.
    /// </summary>
    /// <param name="snpItems"></param>
    /// <param name="fastaFile"></param>
    /// <param name="progress"></param>
    public static void FillReferenceAlleleFromFasta(this IEnumerable<SNPItem> snpItems, string fastaFile, IProgressCallback progress = null)
    {
      if (progress == null)
      {
        progress = new ConsoleProgressCallback();
      }

      var dic = snpItems.ToGroupDictionary(m => m.Chrom);

      progress.SetMessage("Filling reference allele from {0} file ...", fastaFile);
      using (var sw = new StreamReader(fastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sw)) != null)
        {
          progress.SetMessage("chromosome " + seq.Name + " ...");
          var chr = HumanChromosomeToInt(seq.Name);
          if (dic.ContainsKey(chr))
          {
            var snps = dic[chr];
            foreach (var snp in snps)
            {
              snp.RefChar = char.ToUpper(seq.SeqString[snp.Position - 1]);
            }
          }
        }
      }
      progress.SetMessage("Filling reference allele finished.");
    }
Пример #3
0
        public static Dictionary <char, double> GetDatabaseComposition(string fastaFile)
        {
            Dictionary <char, double> result = new Dictionary <char, double>();

            using (StreamReader sr = new StreamReader(fastaFile))
            {
                FastaFormat ff = new FastaFormat();
                Sequence    seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    foreach (char c in seq.SeqString)
                    {
                        if (result.ContainsKey(c))
                        {
                            result[c] = result[c] + 1;
                        }
                        else
                        {
                            result[c] = 1;
                        }
                    }
                }

                double total = result.Values.Sum();

                foreach (char c in result.Keys.ToArray())
                {
                    result[c] = result[c] / total;
                }
            }
            return(result);
        }
Пример #4
0
 public void TestReadSequence()
 {
   FastaFormat ff = new FastaFormat();
   StreamReader sr = new StreamReader("../../../data/test.fasta");
   Sequence test1 = ff.ReadSequence(sr);
   Assert.AreEqual("test1", test1.Name);
   Assert.AreEqual("AAAAA", test1.SeqString);
   Sequence test2 = ff.ReadSequence(sr);
   Assert.AreEqual("test2", test2.Name);
   Assert.AreEqual("BBBBB", test2.SeqString);
   Sequence test3 = ff.ReadSequence(sr);
   Assert.IsNull(test3);
 }
Пример #5
0
        public void TestReadSequence()
        {
            FastaFormat  ff    = new FastaFormat();
            StreamReader sr    = new StreamReader(TestContext.CurrentContext.TestDirectory + "/../../../data//test.fasta");
            Sequence     test1 = ff.ReadSequence(sr);

            Assert.AreEqual("test1", test1.Name);
            Assert.AreEqual("AAAAA", test1.SeqString);
            Sequence test2 = ff.ReadSequence(sr);

            Assert.AreEqual("test2", test2.Name);
            Assert.AreEqual("BBBBB", test2.SeqString);
            Sequence test3 = ff.ReadSequence(sr);

            Assert.IsNull(test3);
        }
Пример #6
0
    public static Dictionary<string, Sequence> GetAccessNumberMap(string database, IStringParser<string> acParser)
    {
      Dictionary<string, Sequence> result = new Dictionary<string, Sequence>();

      using (StreamReader sr = new StreamReader(database))
      {
        FastaFormat sf = new FastaFormat();
        Sequence seq;
        while ((seq = sf.ReadSequence(sr)) != null)
        {
          string ac = acParser.GetValue(seq.Name);
          result[ac] = seq;
        }
      }

      return result;
    }
Пример #7
0
        public static Dictionary <string, Sequence> GetAccessNumberMap(string database, IStringParser <string> acParser)
        {
            Dictionary <string, Sequence> result = new Dictionary <string, Sequence>();

            using (StreamReader sr = new StreamReader(database))
            {
                FastaFormat sf = new FastaFormat();
                Sequence    seq;
                while ((seq = sf.ReadSequence(sr)) != null)
                {
                    string ac = acParser.GetValue(seq.Name);
                    result[ac] = seq;
                }
            }

            return(result);
        }
Пример #8
0
        private void ProcessFile(ref int index, StreamWriter sw, string fastaFile, bool isContaminant)
        {
            FastaFormat ff = new FastaFormat();

            using (StreamReader sr = new StreamReader(fastaFile))
            {
                Progress.SetRange(0, sr.BaseStream.Length);

                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    Progress.SetPosition(StreamUtils.GetCharpos(sr));

                    if (isContaminant)
                    {
                        if (!seq.Reference.StartsWith("CON_"))
                        {
                            seq.Reference = "CON_" + seq.Reference;
                        }
                    }

                    if (combined)
                    {
                        ff.WriteSequence(sw, seq);
                    }

                    if (pseudoAminoacid)
                    {
                        builder.Build(seq);
                    }

                    index++;
                    Sequence reversedSeq = SequenceUtils.GetReversedSequence(seq.SeqString, index);

                    ff.WriteSequence(sw, reversedSeq);
                }
            }
        }
Пример #9
0
        public static IAccessNumberParser GuessParser(string databaseFileName)
        {
            FastaFormat ff = new FastaFormat();

            using (StreamReader sr = new StreamReader(databaseFileName))
            {
                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    if (seq.Reference.StartsWith("IPI"))
                    {
                        return(new NoExceptionAccessNumberParser(new IPIAccessNumberParser()));
                    }

                    if (seq.Reference.StartsWith("gi|"))
                    {
                        return(new NoExceptionAccessNumberParser(new NRAccessNumberParser()));
                    }
                }
            }

            return(DefaultAccessNumberParser.GetInstance());
        }
Пример #10
0
    public static IAccessNumberParser GuessParser(string databaseFileName)
    {
      FastaFormat ff = new FastaFormat();

      using (StreamReader sr = new StreamReader(databaseFileName))
      {
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          if (seq.Reference.StartsWith("IPI"))
          {
            return new NoExceptionAccessNumberParser(new IPIAccessNumberParser());
          }

          if (seq.Reference.StartsWith("gi|"))
          {
            return new NoExceptionAccessNumberParser(new NRAccessNumberParser());
          }
        }
      }

      return DefaultAccessNumberParser.GetInstance();
    }
Пример #11
0
    public static List<CoverageRegion> GetTargetCoverageRegion(ITargetBuilderOptions options, IProgressCallback progress, bool removeRegionWithoutSequence = true)
    {
      List<CoverageRegion> result;
      if (options.TargetFile.EndsWith(".xml"))
      {
        result = GetTargetCoverageRegionFromXml(options, progress);
      }
      else
      {
        result = GetTargetCoverageRegionFromBed(options, progress);
      }

      var dic = result.ToGroupDictionary(m => m.Seqname);

      progress.SetMessage("Filling sequence from {0}...", options.GenomeFastaFile);
      using (var sr = new StreamReader(options.GenomeFastaFile))
      {
        var ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          progress.SetMessage("Processing chromosome {0} ...", seq.Reference);
          var seqname = seq.Name.StringAfter("chr");
          List<CoverageRegion> lst;
          if (dic.TryGetValue(seqname, out lst))
          {
            foreach (var l in lst)
            {
              l.Sequence = seq.SeqString.Substring((int)(l.Start - 1), (int)l.Length);
              if(l.Strand == '+')
              {
                l.ReverseComplementedSequence = SequenceUtils.GetReverseComplementedSequence(l.Sequence);
              }
            }
          }
        }
      }
      if (removeRegionWithoutSequence)
      {
        result.RemoveAll(l => string.IsNullOrEmpty(l.Sequence));
      }

      progress.SetMessage("Filling sequence finished.");

      var namemap = new MapReader(1, 12).ReadFromFile(options.RefgeneFile);
      result.ForEach(m =>
      {
        var gene = m.Name.StringBefore("_utr3");
        m.GeneSymbol = namemap.ContainsKey(gene) ? namemap[gene] : string.Empty;
      });

      return result;
    }
Пример #12
0
 public void TestReadFromFasta()
 {
   FastaFormat ff = new FastaFormat();
   List<Sequence> seqs = SequenceUtils.Read(ff, @"../../../data/Standard_Protein_FIT_060222.noredundant.fasta");
   Assert.AreEqual(43, seqs.Count);
 }
Пример #13
0
    public static Dictionary<char, double> GetDatabaseComposition(string fastaFile)
    {
      Dictionary<char, double> result = new Dictionary<char, double>();
      using (StreamReader sr = new StreamReader(fastaFile))
      {
        FastaFormat ff = new FastaFormat();
        Sequence seq;
        while ((seq = ff.ReadSequence(sr)) != null)
        {
          foreach (char c in seq.SeqString)
          {
            if (result.ContainsKey(c))
            {
              result[c] = result[c] + 1;
            }
            else
            {
              result[c] = 1;
            }
          }
        }

        double total = result.Values.Sum();

        foreach (char c in result.Keys.ToArray())
        {
          result[c] = result[c] / total;
        }
      }
      return result;
    }