public bool Accept(IIdentifiedSpectrum t)
        {
            foreach (var p in t.Proteins)
            {
                var ac = acParser.GetValue(p);
                return(conMap.Contains(ac));
            }

            return(false);
        }
Beispiel #2
0
        public bool Accept(IIdentifiedProteinGroup t)
        {
            foreach (var p in t)
            {
                var ac = acParser.GetValue(p.Name);
                if (conMap.Contains(ac))
                {
                    return(true);
                }
            }

            return(false);
        }
 /// <summary>
 /// 用蛋白质的AccessNumber取代原来的蛋白名,以避免SEQUEST搜索中产生的相同蛋白质有长短不一样的名字导致的错误
 /// </summary>
 /// <typeparam name="T">鉴定谱图</typeparam>
 /// <param name="spectra">鉴定谱图列表</param>
 /// <param name="proteinAccessNumberParser">蛋白质AccessNumber解析器</param>
 public static void RefineIdentifiedProteinName <T>(List <T> spectra, IStringParser <string> proteinAccessNumberParser) where T : IIdentifiedSpectrum
 {
     foreach (T spectrum in spectra)
     {
         foreach (IIdentifiedPeptide peptide in spectrum.Peptides)
         {
             ReadOnlyCollection <string> proteinNames = peptide.Proteins;
             for (int i = 0; i < proteinNames.Count; i++)
             {
                 peptide.SetProtein(i, proteinAccessNumberParser.GetValue(proteinNames[i]));
             }
         }
     }
 }
Beispiel #4
0
    public static Dictionary<string, Sequence> GetAccessNumberMap(string database, IStringParser<string> acParser)
    {
      Dictionary<string, Sequence> result = new Dictionary<string, Sequence>();

      using (StreamReader sr = new StreamReader(database))
      {
        FastaFormat sf = new FastaFormat();
        Sequence seq;
        while ((seq = sf.ReadSequence(sr)) != null)
        {
          string ac = acParser.GetValue(seq.Name);
          result[ac] = seq;
        }
      }

      return result;
    }
Beispiel #5
0
        public static Dictionary <string, Sequence> GetAccessNumberMap(string database, IStringParser <string> acParser)
        {
            Dictionary <string, Sequence> result = new Dictionary <string, Sequence>();

            using (StreamReader sr = new StreamReader(database))
            {
                FastaFormat sf = new FastaFormat();
                Sequence    seq;
                while ((seq = sf.ReadSequence(sr)) != null)
                {
                    string ac = acParser.GetValue(seq.Name);
                    result[ac] = seq;
                }
            }

            return(result);
        }
Beispiel #6
0
        public static HashSet <string> GetContaminationAccessNumbers(IStringParser <string> acParser, string fastaFilename, string contaminationDescriptionPattern,
                                                                     IProgressCallback progress)
        {
            HashSet <string> result = new HashSet <string>();

            if (progress == null)
            {
                progress = new EmptyProgressCallback();
            }

            Regex reg = new Regex(contaminationDescriptionPattern, RegexOptions.IgnoreCase);

            progress.SetMessage("Get contamination map from database ...");
            var ff = new FastaFormat();

            using (var sr = new StreamReader(fastaFilename))
            {
                progress.SetRange(1, sr.BaseStream.Length);

                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    if (progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    progress.SetPosition(sr.GetCharpos());

                    string ac = acParser.GetValue(seq.Name);

                    if (reg.Match(seq.Reference).Success)
                    {
                        result.Add(ac);
                    }
                }
            }

            progress.SetMessage("Get contamination map from database finished.");

            return(result);
        }
        protected override bool FillSequence(IIdentifiedResult result)
        {
            foreach (var msfFile in msfFiles)
            {
                var proteins = new MsfDatabaseToNoredundantProcessor().ParseProteinSequences(msfFile).ToDictionary(m => acParser.GetValue(m.Name));
                foreach (var g in result)
                {
                    foreach (var p in g)
                    {
                        var      name = acParser.GetValue(p.Name);
                        Sequence seq;
                        if (proteins.TryGetValue(name, out seq))
                        {
                            p.Sequence  = seq.SeqString;
                            p.Reference = seq.Reference;
                            p.Name      = name;
                        }
                    }
                }
            }

            foreach (var g in result)
            {
                foreach (IIdentifiedProtein protein in g)
                {
                    if (protein.Name.StartsWith(MsfDatabaseParser.DECOY_PREFIX))
                    {
                        continue;
                    }

                    if (protein.Sequence == null)
                    {
                        throw new Exception(
                                  MyConvert.Format(
                                      "Couldn't find sequence of protein {0}, change access number pattern or select another database.",
                                      protein.Name));
                    }
                }
            }

            return(true);
        }
Beispiel #8
0
        public static void FillSequenceFromFasta(IStringParser <string> acParser, string fastaFilename, IIdentifiedResult t,
                                                 IProgressCallback progress)
        {
            if (progress == null)
            {
                progress = new EmptyProgressCallback();
            }

            progress.SetMessage("Initializing accessNumber/protein map ...");

            var acMap = new Dictionary <string, IIdentifiedProtein>();

            foreach (IIdentifiedProteinGroup group in t)
            {
                foreach (IIdentifiedProtein protein in group)
                {
                    string ac = acParser.GetValue(protein.Name);
                    if (acMap.ContainsKey(ac))
                    {
                        throw new Exception("Duplicate access number " + ac);
                    }
                    acMap[ac] = protein;

                    if (ac != protein.Name)
                    {
                        if (acMap.ContainsKey(protein.Name))
                        {
                            throw new Exception("Duplicate access number " + protein.Name);
                        }
                        acMap[protein.Name] = protein;
                    }
                }
            }

            progress.SetMessage("Filling sequence from database ...");
            var ff = new FastaFormat();

            using (var sr = new StreamReader(fastaFilename))
            {
                progress.SetRange(1, sr.BaseStream.Length);

                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    if (progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    progress.SetPosition(sr.BaseStream.Position);

                    string ac = acParser.GetValue(seq.Name);
                    if (acMap.ContainsKey(ac))
                    {
                        IIdentifiedProtein protein = acMap[ac];
                        protein.Name        = seq.Name.Replace("/", " ");
                        protein.Description = seq.Description.Replace("\t", " ").Replace("/", " ");
                        protein.Sequence    = seq.SeqString;
                    }
                }
            }

            var failed = acMap.Values.Where(l => l.Sequence == null).ToList();

            if (failed.Count > 0)
            {
                var proteinNames = failed.ConvertAll(l => l.Name).ToArray();
                if (!proteinNames.All(l => l.StartsWith("XXX_")))
                {
                    throw new Exception(string.Format("Couldn't find sequence of following protein(s), change access number pattern or select another database\n{0}", proteinNames.Merge("/")));
                }
            }

            progress.SetMessage("Fill sequence from database finished.");
        }