public override void SetProperty(T t, string value)
        {
            string[] proteins = reg.Split(value);

            if (t.Peptides.Count != proteins.Length)
            {
                t.ClearPeptides();

                for (int i = 0; i < proteins.Length; i++)
                {
                    IIdentifiedPeptide mp = new IdentifiedPeptide(t);

                    string[] parts = proteins[i].Split(chars);
                    foreach (string part in parts)
                    {
                        mp.AddProtein(part);
                    }
                }
            }
            else
            {
                for (int i = 0; i < proteins.Length; i++)
                {
                    string[] parts = proteins[i].Split(chars);
                    t.Peptides[i].ClearProteins();
                    foreach (string part in parts)
                    {
                        t.Peptides[i].AddProtein(part);
                    }
                }
            }
        }
Пример #2
0
        private void ParseSearchHit(IIdentifiedSpectrum sph, XElement searchHit, PepXmlModifications ppmods)
        {
            var sp = new IdentifiedPeptide(sph);

            var mod_info = searchHit.FindFirstDescendant("modification_info");

            string seq = searchHit.Attribute("peptide").Value;

            if (mod_info != null)
            {
                var modified_peptide = mod_info.Attribute("modified_peptide");
                if (modified_peptide != null && !modReg.Match(modified_peptide.Value).Success)
                {
                    seq = modified_peptide.Value;
                }
                else
                {
                    var pureSeq = seq;
                    var modaas  = PeptideProphetUtils.ParseModificationAminoacidMass(mod_info);
                    if (modaas != null && modaas.Count > 0)
                    {
                        modaas.Reverse();
                        foreach (var modaa in modaas)
                        {
                            string modchar = FindModificationChar(ppmods, modaa, pureSeq);
                            seq = seq.Insert(modaa.Position, modchar);
                        }
                    }
                }
            }

            if (searchHit.Attribute("peptide_prev_aa") != null)
            {
                sp.Sequence = searchHit.Attribute("peptide_prev_aa").Value + "." +
                              seq + "." +
                              searchHit.Attribute("peptide_next_aa").Value;
            }
            else
            {
                sp.Sequence = seq;
            }

            sph.NumMissedCleavages = GetAttributeValue(searchHit, "num_missed_cleavages", 0);
            sph.NumProteaseTermini = GetAttributeValue(searchHit, "num_tol_term", 2);

            sp.AddProtein(searchHit.Attribute("protein").Value);

            var NumTotalProteins = int.Parse(searchHit.Attribute("num_tot_proteins").Value);

            if (NumTotalProteins > 1)
            {
                var alternative_proteins = searchHit.FindDescendants("alternative_protein");
                foreach (var alternative_protein in alternative_proteins)
                {
                    sp.AddProtein(alternative_protein.Attribute("protein").Value);
                }
            }

            ParseScoreAndOtherInformation(sph, searchHit);
        }
        protected void ModifySequence(IdentifiedPeptide mp, Dictionary <int, string> modifications, PFindModification mm)
        {
            var positions = (from m in modifications
                             where !mm.StaticModification.ModificationMap.ContainsValue(m.Value)
                             orderby m.Key descending
                             select m.Key).ToList();

            foreach (var pos in positions)
            {
                string mod = modifications[pos];
                if (this.ModificationCharMap.ContainsKey(mod))
                {
                    if (pos == mp.Sequence.Length)
                    {
                        mp.Sequence = mp.Sequence + this.ModificationCharMap[mod].ToString();
                    }
                    else
                    {
                        mp.Sequence = mp.Sequence.Insert(pos + 1, this.ModificationCharMap[mod].ToString());
                    }
                }
                else
                {
                    throw new Exception(MyConvert.Format("Cannot find dynamic modification {0} definition", mod));
                }
            }
        }
Пример #4
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result = new List <IIdentifiedSpectrum>();
            XElement root   = XElement.Load(fileName);
            var      psms   = root.FindElement("psms").FindElements("psm");

            foreach (var psm in psms)
            {
                IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                spec.Id            = psm.FindAttribute("psm_id").Value.StringAfter("decoy_");
                spec.FromDecoy     = psm.FindAttribute("decoy").Value.Equals("true");
                spec.SpScore       = double.Parse(psm.FindElement("svm_score").Value);
                spec.QValue        = double.Parse(psm.FindElement("q_value").Value);
                spec.Score         = double.Parse(psm.FindElement("pep").Value);
                spec.Probability   = double.Parse(psm.FindElement("p_value").Value);
                spec.TheoreticalMH = double.Parse(psm.FindElement("calc_mass").Value);
                spec.Query.FileScan.Experimental = Path.GetFileName(fileName).StringBefore(".");
                var pep    = new IdentifiedPeptide(spec);
                var pepseq = psm.FindElement("peptide_seq");
                pep.Sequence = pepseq.FindAttribute("seq").Value;
                pep.AddProtein(psm.FindElement("protein_id").Value);
                result.Add(spec);
            }

            return(result);
        }
        public override void SetProperty(T t, string value)
        {
            if (string.IsNullOrEmpty(value))
            {
                return;
            }

            string[] siteProbs = reg.Split(value);

            if (t.Peptides.Count != siteProbs.Length)
            {
                t.ClearPeptides();

                for (int i = 0; i < siteProbs.Length; i++)
                {
                    IIdentifiedPeptide mp = new IdentifiedPeptide(t);
                    mp.SiteProbability = siteProbs[i];
                }
            }
            else
            {
                for (int i = 0; i < siteProbs.Length; i++)
                {
                    t.Peptides[i].SiteProbability = siteProbs[i];
                }
            }
        }
        public void SetUp()
        {
            seqs      = new Sequence[] { new Sequence("Test1", "ADFADJLFASRDLFKJWONNCKAOIWJEFLNC"), new Sequence("Test2", "WEUOIRJKJNCJKLSDTORWELSDJF") }.ToList();
            identical = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "JWONNCK"
            };
            singlemutation = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "LSDAOR"
            };

            misscleavageIdentical = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "LFKJWONNCK"
            };
            misscleavageSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "JKLSDAOR"
            };

            nptIdentical = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "JWONNC"
            };
            nptSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum())
            {
                Sequence = "LSDAO"
            };

            //validator = new SnpDataValidator(null, null, null, new Protease("Trypsin", true, "KR", "P"));
        }
Пример #7
0
    private static List<IIdentifiedProtein> InitProteins()
    {
      var mph1 = new IdentifiedSpectrum();
      mph1.Query.FileScan.Experimental = "EXP1";
      var mp1 = new IdentifiedPeptide(mph1);
      mp1.AddProtein("Protein1");
      mp1.AddProtein("Protein2");
      mp1.Sequence = "SEQ1";

      var mph2 = new IdentifiedSpectrum();
      mph2.Query.FileScan.Experimental = "EXP2";
      var mp2 = new IdentifiedPeptide(mph2);
      mp2.AddProtein("Protein1");
      mp2.AddProtein("Protein3");
      mp2.Sequence = "SEQ2";

      var mpro1 = new IdentifiedProtein("Protein1");
      mpro1.Peptides.Add(mp1);
      mpro1.Peptides.Add(mp2);

      var mpro2 = new IdentifiedProtein("Protein2");
      mpro2.Peptides.Add(mp1);

      var mpro3 = new IdentifiedProtein("Protein3");
      mpro3.Peptides.Add(mp2);

      var result = new List<IIdentifiedProtein>();

      result.Add(mpro3);
      result.Add(mpro2);
      result.Add(mpro1);

      return result;
    }
        public void TestSort()
        {
            var mph1     = new IdentifiedSpectrum();
            var peptide1 = new IdentifiedPeptide(mph1);

            var mph2     = new IdentifiedSpectrum();
            var peptide2 = new IdentifiedPeptide(mph2);

            var mpro1 = new IdentifiedProtein("Protein1");

            mpro1.Peptides.Add(peptide1);
            mpro1.Peptides.Add(peptide2);

            var mpro2 = new IdentifiedProtein("Protein2");

            mpro2.Peptides.Add(peptide1);

            var mpro3 = new IdentifiedProtein("Protein3");

            mpro3.Peptides.Add(peptide2);

            var mpros = new List <IdentifiedProtein>();

            mpros.Add(mpro3);
            mpros.Add(mpro2);
            mpros.Add(mpro1);

            mpros.Sort();

            Assert.AreEqual(mpro1, mpros[0]);
            Assert.AreEqual(mpro2, mpros[1]);
            Assert.AreEqual(mpro3, mpros[2]);
        }
Пример #9
0
    public void TestSort()
    {
      var mph1 = new IdentifiedSpectrum();
      var peptide1 = new IdentifiedPeptide(mph1);

      var mph2 = new IdentifiedSpectrum();
      var peptide2 = new IdentifiedPeptide(mph2);

      var mpro1 = new IdentifiedProtein("Protein1");
      mpro1.Peptides.Add(peptide1);
      mpro1.Peptides.Add(peptide2);

      var mpro2 = new IdentifiedProtein("Protein2");
      mpro2.Peptides.Add(peptide1);

      var mpro3 = new IdentifiedProtein("Protein3");
      mpro3.Peptides.Add(peptide2);

      var mpros = new List<IdentifiedProtein>();
      mpros.Add(mpro3);
      mpros.Add(mpro2);
      mpros.Add(mpro1);

      mpros.Sort();

      Assert.AreEqual(mpro1, mpros[0]);
      Assert.AreEqual(mpro2, mpros[1]);
      Assert.AreEqual(mpro3, mpros[2]);
    }
        public void Test()
        {
            IPropertyConverter <IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter <IdentifiedSpectrum>();
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.AddProtein("11111");

            var mp2 = new IdentifiedPeptide(mph);

            mp2.AddProtein("22222");
            mp2.AddProtein("33333");

            Assert.AreEqual("Reference", io.Name);
            Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph));

            io.SetProperty(mph, "44444/55555 ! 66666");

            Assert.AreEqual(2, mph.Peptides[0].Proteins.Count);
            Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]);
            Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]);

            Assert.AreEqual(1, mph.Peptides[1].Proteins.Count);
            Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]);
        }
        /// <summary>
        ///
        /// Get the query/peptide map from pNovo result.
        ///
        /// </summary>
        /// <param name="filename">pNovo proteins file</param>
        /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param>
        /// <param name="minScore">Minimum score of peptide identified in same spectrum</param>
        /// <returns>Query/peptide map</returns>
        public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank, double minScore)
        {
            var result = new List <IIdentifiedSpectrum>();

            SequestFilename sf = null;

            int    charge = 2;
            double expmh  = 0;

            using (var sr = new StreamReader(filename))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    var parts = line.Split('\t');
                    if (parts.Length <= 5)
                    { //spectrum information
                        var seqcount = Convert.ToInt32(parts.Last());
                        if (seqcount == 0)
                        {
                            continue;
                        }

                        sf     = parser.GetValue(parts[0]);
                        expmh  = MyConvert.ToDouble(parts[1]);
                        charge = Convert.ToInt32(parts[2]);
                    }
                    else
                    {
                        int curIndex = Convert.ToInt32(parts[0]);

                        if (curIndex <= maxRank)
                        {
                            var score = MyConvert.ToDouble(parts[2]);
                            if (score < minScore)
                            {
                                continue;
                            }

                            var curSpectrum = new IdentifiedSpectrum();
                            curSpectrum.Query.FileScan = sf;
                            curSpectrum.Query.Charge   = charge;
                            curSpectrum.ExperimentalMH = expmh;
                            curSpectrum.Score          = score;
                            result.Add(curSpectrum);

                            IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum);
                            pep.Sequence = ModifySequence(parts[9]);
                            pep.Spectrum.TheoreticalMH = MyConvert.ToDouble(parts[11]);
                            pep.Spectrum.Rank          = curIndex;
                        }
                    }
                }
            }
            return(result);
        }
Пример #12
0
    public void TestSetSequence()
    {
      var pi = new IdentifiedPeptide(new IdentifiedSpectrum());
      pi.Sequence = "-.MAS*ESETLNPSAR.I";
      Assert.AreEqual("-.MAS*ESETLNPSAR.I", pi.Sequence);
      Assert.AreEqual("MASESETLNPSAR", pi.PureSequence);

      pi.Sequence = "MAS*ESETLNPSAR";
      Assert.AreEqual("MAS*ESETLNPSAR", pi.Sequence);
      Assert.AreEqual("MASESETLNPSAR", pi.PureSequence);
    }
Пример #13
0
    public void TestAddProtein()
    {
      var pi = new IdentifiedPeptide(new IdentifiedSpectrum());
      pi.AddProtein("AAAAA\tBBBBB");
      Assert.AreEqual(1, pi.Proteins.Count);
      Assert.AreEqual("AAAAA BBBBB", pi.Proteins[0]);

      pi.SetProtein(0, "CCCCC\tDDDDD");
      Assert.AreEqual(1, pi.Proteins.Count);
      Assert.AreEqual("CCCCC DDDDD", pi.Proteins[0]);
    }
    public void TestGetSequenceString()
    {
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.Sequence = "P1";

      var mp2 = new IdentifiedPeptide(mph);
      mp2.Sequence = "P2";

      Assert.AreEqual("P1 ! P2", MascotPeptideHitTextWriter.GetSequenceString(mph));
    }
        public void TestAddProtein()
        {
            var pi = new IdentifiedPeptide(new IdentifiedSpectrum());

            pi.AddProtein("AAAAA\tBBBBB");
            Assert.AreEqual(1, pi.Proteins.Count);
            Assert.AreEqual("AAAAA BBBBB", pi.Proteins[0]);

            pi.SetProtein(0, "CCCCC\tDDDDD");
            Assert.AreEqual(1, pi.Proteins.Count);
            Assert.AreEqual("CCCCC DDDDD", pi.Proteins[0]);
        }
        public void TestSetSequence()
        {
            var pi = new IdentifiedPeptide(new IdentifiedSpectrum());

            pi.Sequence = "-.MAS*ESETLNPSAR.I";
            Assert.AreEqual("-.MAS*ESETLNPSAR.I", pi.Sequence);
            Assert.AreEqual("MASESETLNPSAR", pi.PureSequence);

            pi.Sequence = "MAS*ESETLNPSAR";
            Assert.AreEqual("MAS*ESETLNPSAR", pi.Sequence);
            Assert.AreEqual("MASESETLNPSAR", pi.PureSequence);
        }
    public void TestGetProteinString()
    {
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.AddProtein("P1");

      var mp2 = new IdentifiedPeptide(mph);
      mp2.AddProtein("P2");
      mp2.AddProtein("P3");

      Assert.AreEqual("P1 ! P2/P3", MascotPeptideHitTextWriter.GetProteinString(mph));
    }
Пример #18
0
        public void TestBuildProteins()
        {
            var mph1 = new IdentifiedSpectrum();

            mph1.Query.FileScan.Experimental = "EXP1";
            var mp1 = new IdentifiedPeptide(mph1);

            mp1.AddProtein("Protein1");
            mp1.AddProtein("Protein2");

            var mph2 = new IdentifiedSpectrum();

            mph2.Query.FileScan.Experimental = "EXP2";
            var mp2 = new IdentifiedPeptide(mph2);

            mp2.AddProtein("Protein1");
            mp2.AddProtein("Protein3");

            var mphs = new List <IIdentifiedSpectrum>();

            mphs.Add(mph1);
            mphs.Add(mph2);

            List <IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs);

            Assert.AreEqual(3, proteins.Count);

            foreach (IdentifiedProtein mp in proteins)
            {
                if (mp.Name.Equals("Protein1"))
                {
                    Assert.AreEqual(2, mp.Peptides.Count);
                    continue;
                }

                if (mp.Name.Equals("Protein2"))
                {
                    Assert.AreEqual(1, mp.Peptides.Count);
                    Assert.AreEqual(mp1, mp.Peptides[0]);
                    continue;
                }

                if (mp.Name.Equals("Protein3"))
                {
                    Assert.AreEqual(1, mp.Peptides.Count);
                    Assert.AreEqual(mp2, mp.Peptides[0]);
                    continue;
                }
            }
        }
        public void TestGetSequenceString()
        {
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.Sequence = "P1";

            var mp2 = new IdentifiedPeptide(mph);

            mp2.Sequence = "P2";

            Assert.AreEqual("P1 ! P2", MascotPeptideHitTextWriter.GetSequenceString(mph));
        }
Пример #20
0
    public void SetUp()
    {

      seqs = new Sequence[] { new Sequence("Test1", "ADFADJLFASRDLFKJWONNCKAOIWJEFLNC"), new Sequence("Test2", "WEUOIRJKJNCJKLSDTORWELSDJF") }.ToList();
      identical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JWONNCK" };
      singlemutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LSDAOR" };

      misscleavageIdentical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LFKJWONNCK" };
      misscleavageSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JKLSDAOR" };

      nptIdentical = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "JWONNC" };
      nptSingleMutation = new IdentifiedPeptide(new IdentifiedSpectrum()) { Sequence = "LSDAO" };

      //validator = new SnpDataValidator(null, null, null, new Protease("Trypsin", true, "KR", "P"));
    }
        public void TestGetProteinString()
        {
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.AddProtein("P1");

            var mp2 = new IdentifiedPeptide(mph);

            mp2.AddProtein("P2");
            mp2.AddProtein("P3");

            Assert.AreEqual("P1 ! P2/P3", MascotPeptideHitTextWriter.GetProteinString(mph));
        }
Пример #22
0
    public void TestBuildProteins()
    {
      var mph1 = new IdentifiedSpectrum();
      mph1.Query.FileScan.Experimental = "EXP1";
      var mp1 = new IdentifiedPeptide(mph1);
      mp1.AddProtein("Protein1");
      mp1.AddProtein("Protein2");

      var mph2 = new IdentifiedSpectrum();
      mph2.Query.FileScan.Experimental = "EXP2";
      var mp2 = new IdentifiedPeptide(mph2);
      mp2.AddProtein("Protein1");
      mp2.AddProtein("Protein3");

      var mphs = new List<IIdentifiedSpectrum>();
      mphs.Add(mph1);
      mphs.Add(mph2);

      List<IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs);

      Assert.AreEqual(3, proteins.Count);

      foreach (IdentifiedProtein mp in proteins)
      {
        if (mp.Name.Equals("Protein1"))
        {
          Assert.AreEqual(2, mp.Peptides.Count);
          continue;
        }

        if (mp.Name.Equals("Protein2"))
        {
          Assert.AreEqual(1, mp.Peptides.Count);
          Assert.AreEqual(mp1, mp.Peptides[0]);
          continue;
        }

        if (mp.Name.Equals("Protein3"))
        {
          Assert.AreEqual(1, mp.Peptides.Count);
          Assert.AreEqual(mp2, mp.Peptides[0]);
          continue;
        }
      }
    }
Пример #23
0
        //  1.   1 /  1          0 1964.9940  0.0000  5.6970  2133.9  21/30  sw|P02666|CASBBOVIN   +1  K.FQSEEQQQTEDELQDK.I
        protected bool ParseFromOutfileLine(string line, IdentifiedSpectrum entry)
        {
            //  Console.Out.WriteLine(line);
            //   dfadfas  entry.IsProteinFromOutFile = true;
            string sLine = line.Trim().Replace('/', ' ');

            string[] sLines = this.reg.Split(sLine);
            if (sLines.Length < itemIndex.MinCount)
            {
                return(false);
            }

            entry.Rank                = int.Parse(sLines[itemIndex.RankIndex]);
            entry.SpRank              = int.Parse(sLines[itemIndex.SpRankIndex]);
            entry.TheoreticalMH       = MyConvert.ToDouble(sLines[itemIndex.TheoreticalMHIndex]);
            entry.DeltaScore          = MyConvert.ToDouble(sLines[itemIndex.DeltaScoreIndex]);
            entry.Score               = MyConvert.ToDouble(sLines[itemIndex.ScoreIndex]);
            entry.SpScore             = MyConvert.ToDouble(sLines[itemIndex.SpScoreIndex]);
            entry.MatchedIonCount     = int.Parse(sLines[itemIndex.MatchedIonCountIndex]);
            entry.TheoreticalIonCount = int.Parse(sLines[itemIndex.TheoreticalIonCountIndex]);

            entry.ClearPeptides();
            string sequence;

            if ('+' != sLines[itemIndex.SequenceIndex][0])
            {
                entry.DuplicatedCount = 0;
                sequence = sLines[itemIndex.SequenceIndex];
            }
            else
            {
                entry.DuplicatedCount = int.Parse(sLines[itemIndex.SequenceIndex].Substring(1, sLines[itemIndex.SequenceIndex].Length - 1));
                sequence = sLines[itemIndex.SequenceIndex + 1];
            }

            CheckSequenceValid(ref sequence);

            var sp = new IdentifiedPeptide(entry);

            sp.Sequence = sequence;
            sp.AddProtein(sLines[itemIndex.ProteinIndex]);

            return(true);
        }
    public void Test()
    {
      IPropertyConverter<IdentifiedSpectrum> io = new IdentifiedSpectrumSequenceConverter<IdentifiedSpectrum>();
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.Sequence = "AAAAA";

      var mp2 = new IdentifiedPeptide(mph);
      mp2.Sequence = "BBBBB";

      Assert.AreEqual("Sequence", io.Name);
      Assert.AreEqual("AAAAA ! BBBBB", io.GetProperty(mph));

      io.SetProperty(mph, "CCCCC ! DDDDD ! EEEEE");
      Assert.AreEqual(3, mph.Peptides.Count);
      Assert.AreEqual("CCCCC", mph.Peptides[0].Sequence);
      Assert.AreEqual("DDDDD", mph.Peptides[1].Sequence);
      Assert.AreEqual("EEEEE", mph.Peptides[2].Sequence);
    }
Пример #25
0
        private static List <IIdentifiedProtein> InitProteins()
        {
            var mph1 = new IdentifiedSpectrum();

            mph1.Query.FileScan.Experimental = "EXP1";
            var mp1 = new IdentifiedPeptide(mph1);

            mp1.AddProtein("Protein1");
            mp1.AddProtein("Protein2");
            mp1.Sequence = "SEQ1";

            var mph2 = new IdentifiedSpectrum();

            mph2.Query.FileScan.Experimental = "EXP2";
            var mp2 = new IdentifiedPeptide(mph2);

            mp2.AddProtein("Protein1");
            mp2.AddProtein("Protein3");
            mp2.Sequence = "SEQ2";

            var mpro1 = new IdentifiedProtein("Protein1");

            mpro1.Peptides.Add(mp1);
            mpro1.Peptides.Add(mp2);

            var mpro2 = new IdentifiedProtein("Protein2");

            mpro2.Peptides.Add(mp1);

            var mpro3 = new IdentifiedProtein("Protein3");

            mpro3.Peptides.Add(mp2);

            var result = new List <IIdentifiedProtein>();

            result.Add(mpro3);
            result.Add(mpro2);
            result.Add(mpro1);

            return(result);
        }
Пример #26
0
        protected bool ParseFromOutfileLineWithId(List <string> sLines, IdentifiedSpectrum entry)
        {
            if (sLines.Count < 12)
            {
                return(false);
            }

            //entry.Index = int.Parse(sLines[0].Substring(0, sLines[0].Length - 1));
            entry.Rank   = int.Parse(sLines[1]);
            entry.SpRank = int.Parse(sLines[2]);
            //entry.Id = int.Parse(sLines[3]);
            entry.TheoreticalMH       = MyConvert.ToDouble(sLines[4]);
            entry.DeltaScore          = MyConvert.ToDouble(sLines[5]);
            entry.Score               = MyConvert.ToDouble(sLines[6]);
            entry.SpScore             = MyConvert.ToDouble(sLines[7]);
            entry.MatchedIonCount     = int.Parse(sLines[8]);
            entry.TheoreticalIonCount = int.Parse(sLines[9]);

            entry.ClearPeptides();
            string sequence;

            if ('+' != sLines[11][0])
            {
                entry.DuplicatedCount = 0;
                sequence = sLines[11];
            }
            else
            {
                entry.DuplicatedCount = int.Parse(sLines[11].Substring(1, sLines[11].Length - 1));
                sequence = sLines[12];
            }

            CheckSequenceValid(ref sequence);

            var sp = new IdentifiedPeptide(entry);

            sp.Sequence = sequence;
            sp.AddProtein(sLines[10]);

            return(true);
        }
Пример #27
0
        public void Test()
        {
            IPropertyConverter <IdentifiedSpectrum> io = new IdentifiedSpectrumSequenceConverter <IdentifiedSpectrum>();
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.Sequence = "AAAAA";

            var mp2 = new IdentifiedPeptide(mph);

            mp2.Sequence = "BBBBB";

            Assert.AreEqual("Sequence", io.Name);
            Assert.AreEqual("AAAAA ! BBBBB", io.GetProperty(mph));

            io.SetProperty(mph, "CCCCC ! DDDDD ! EEEEE");
            Assert.AreEqual(3, mph.Peptides.Count);
            Assert.AreEqual("CCCCC", mph.Peptides[0].Sequence);
            Assert.AreEqual("DDDDD", mph.Peptides[1].Sequence);
            Assert.AreEqual("EEEEE", mph.Peptides[2].Sequence);
        }
Пример #28
0
    public void TestCalculateCoverage()
    {
      IdentifiedProtein protein = new IdentifiedProtein();
      //total 30 amino acids
      protein.Sequence = "ABCDEDFDEFDSESLKJFDJLSLGFGDDGD";

      IdentifiedSpectrum s1 = new IdentifiedSpectrum();
      IdentifiedPeptide p1 = new IdentifiedPeptide(s1);
      p1.Sequence = "B.CDEDF.D";
      protein.Peptides.Add(p1);

      protein.CalculateCoverage();
      Assert.AreEqual(16.67, protein.Coverage, 0.01);

      IdentifiedSpectrum s2 = new IdentifiedSpectrum();
      IdentifiedPeptide p2 = new IdentifiedPeptide(s2);
      p2.Sequence = "F.DSESL.K";
      protein.Peptides.Add(p2);

      protein.CalculateCoverage();
      Assert.AreEqual(33.33, protein.Coverage, 0.01);

      IdentifiedSpectrum s3 = new IdentifiedSpectrum();
      IdentifiedPeptide p3 = new IdentifiedPeptide(s3);
      p3.Sequence = "L.SLGF.G";
      protein.Peptides.Add(p3);

      protein.CalculateCoverage();
      Assert.AreEqual(46.67, protein.Coverage, 0.01);

      IdentifiedSpectrum s4 = new IdentifiedSpectrum();
      IdentifiedPeptide p4 = new IdentifiedPeptide(s4);
      p4.Sequence = "L.SLGFG.D";
      protein.Peptides.Add(p4);

      protein.CalculateCoverage();
      Assert.AreEqual(50.00, protein.Coverage, 0.01);
    }
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result       = new List <IIdentifiedSpectrum>();
            XElement root         = XElement.Load(fileName);
            var      features     = root.FindElement("featureDescriptions");
            var      descriptions = features.FindElements("featureDescription");

            var missIndex = FindIndex(fileName, descriptions, "# Missed Cleavages");

            var scans = root.FindElements("fragSpectrumScan");

            foreach (var scan in scans)
            {
                var scanNumber = int.Parse(scan.FindAttribute("scanNumber").Value);
                var psms       = scan.FindElements("peptideSpectrumMatch");
                foreach (var psm in psms)
                {
                    IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                    spec.Query.QueryId  = scanNumber;
                    spec.Id             = psm.FindAttribute("id").Value.StringAfter("decoy_");
                    spec.FromDecoy      = psm.FindAttribute("isDecoy").Value.Equals("true");
                    spec.TheoreticalMH  = double.Parse(psm.FindAttribute("calculatedMassToCharge").Value);
                    spec.ExperimentalMH = double.Parse(psm.FindAttribute("experimentalMassToCharge").Value);
                    spec.Query.Charge   = int.Parse(psm.FindAttribute("chargeState").Value);
                    var pep = new IdentifiedPeptide(spec);
                    pep.Sequence = psm.FindElement("peptide").FindElement("peptideSequence").Value;
                    pep.AddProtein(psm.FindElement("occurence").FindAttribute("proteinId").Value);

                    var featureEles = psm.FindElement("features").FindElements("feature");
                    //The first one is the score.
                    spec.Score = double.Parse(featureEles[0].Value);
                    spec.NumMissedCleavages = int.Parse(featureEles[missIndex].Value);
                    result.Add(spec);
                }
            }

            return(result);
        }
    public void TestNoredundant()
    {
      string header = "\t\"File, Scan(s)\"\tSequence\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tDeltaScore\tExpectValue\tQuery\tIons\tReference\tDIFF_MODIFIED_CANDIDATE\tPI\tMissCleavage\tModification";
      IPropertyConverter<IIdentifiedSpectrum> converter = IdentifiedSpectrumPropertyConverterFactory.GetInstance().GetConverters(header, '\t');

      Assert.AreEqual(header, converter.Name);

      IIdentifiedSpectrum mphit = new IdentifiedSpectrum();
      mphit.Query.FileScan.ShortFileName = "AAA,1-2";

      IdentifiedPeptide mp1 = new IdentifiedPeptide(mphit);
      mp1.Sequence = "AAAAA";
      mp1.AddProtein("PROTEIN1");
      mp1.AddProtein("PROTEIN2");

      IdentifiedPeptide mp2 = new IdentifiedPeptide(mphit);
      mp2.Sequence = "BBBBB";
      mp2.AddProtein("PROTEIN3");

      mphit.TheoreticalMH = 1000.00102;
      mphit.ExperimentalMH = 1000.0;
      mphit.Query.Charge = 2;
      mphit.Rank = 1;
      mphit.Score = 100.2;
      mphit.DeltaScore = 0.5;
      mphit.ExpectValue = 1.1e-2;
      mphit.Query.QueryId = 10;
      mphit.NumMissedCleavages = 1;
      mphit.Modifications = "O18(1)";

      string expect = "	AAA,1 - 2	AAAAA ! BBBBB	1000.00102	0.00102	2	1	100.2	0.5	1.10E-002	10	0|0	PROTEIN1/PROTEIN2 ! PROTEIN3		0.00	1	O18(1)";
      Assert.AreEqual(expect, converter.GetProperty(mphit));

      string expectNew = "	BBB,2 - 3	BBBBB	1002.00783	-0.00200	3	2	200.2	0.6	1.20E-003	20	0|0	PROTEIN2/PROTEIN4		0.00	2	O18(2)";
      converter.SetProperty(mphit, expectNew);
      Assert.AreEqual(expectNew, converter.GetProperty(mphit));
    }
    public void TestBuild()
    {
      var pep1 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("A", 1, 1, 1, ".dta"))) { Sequence = "A" };
      var pep2 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("B", 1, 1, 1, ".dta"))) { Sequence = "B" };
      var pep3 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("C", 1, 1, 1, ".dta"))) { Sequence = "C" };
      var pep4 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("D", 1, 1, 1, ".dta"))) { Sequence = "D" };
      var pep5 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("E", 1, 1, 1, ".dta"))) { Sequence = "E" };
      var pep6 = new IdentifiedPeptide(new IdentifiedSpectrum(new SequestFilename("F", 1, 1, 1, ".dta"))) { Sequence = "F" };

      var protein1 = new IdentifiedProtein()
      {
        Peptides = new IIdentifiedPeptide[] { pep1, pep3, pep5, pep6 }.ToList()
      };

      var protein2 = new IdentifiedProtein()
      {
        Peptides = new IIdentifiedPeptide[] { pep2, pep3, pep4 }.ToList()
      };

      //should be removed from final result since all peptides has been included in protein1 and protein2, even one protein contains both peptides
      var protein3 = new IdentifiedProtein()
      {
        Peptides = new IIdentifiedPeptide[] { pep1, pep2 }.ToList()
      };

      //should be removed from final result since all peptides has been included in protein1
      var protein4 = new IdentifiedProtein()
      {
        Peptides = new IIdentifiedPeptide[] { pep1, pep5 }.ToList()
      };

      var actual = new IdentifiedProteinGroupBuilder().Build(new IIdentifiedProtein[] { protein1, protein2, protein3 }.ToList());
      Assert.AreEqual(2, actual.Count);
      Assert.AreSame(protein1, actual[0][0]);
      Assert.AreSame(protein2, actual[1][0]);
    }
Пример #32
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var result = new List <IIdentifiedSpectrum>();
            var anns   = new AnnotationFormat().ReadFromFile(fileName);

            foreach (var ann in anns)
            {
                var peptideId = ann.Annotations["PeptideId"] as string;
                var sequence  = peptideId.StringBefore("_");
                var spec      = new IdentifiedSpectrum();
                var pep       = new IdentifiedPeptide(spec);
                pep.Sequence = sequence;

                spec.Query.FileScan.Experimental  = ann.Annotations["Sample"] as string;
                spec.Query.FileScan.RetentionTime = double.Parse(ann.Annotations["PredictionRetentionTime"] as string);
                spec.Query.FileScan.Charge        = int.Parse(ann.Annotations["Charge"] as string);
                spec.IsPrecursorMonoisotopic      = true;
                spec.TheoreticalMH = PrecursorUtils.MzToMH(double.Parse(ann.Annotations["TheoreticalMz"] as string), spec.Query.FileScan.Charge, true);

                result.Add(spec);
            }

            return(result);
        }
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result   = new List <IIdentifiedSpectrum>();
            XElement root     = XElement.Load(fileName);
            var      peptides = root.FindElement("peptides").FindElements("peptide");

            foreach (var peptide in peptides)
            {
                IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                var pep = new IdentifiedPeptide(spec);

                pep.Sequence         = peptide.FindAttribute("peptide_id").Value;
                spec.FromDecoy       = peptide.FindAttribute("decoy").Value.Equals("true");
                spec.SpScore         = double.Parse(peptide.FindElement("svm_score").Value);
                spec.QValue          = double.Parse(peptide.FindElement("q_value").Value);
                spec.Score           = double.Parse(peptide.FindElement("pep").Value);
                spec.TheoreticalMass = double.Parse(peptide.FindElement("calc_mass").Value);
                pep.AddProtein(peptide.FindElement("protein_id").Value);
                spec.Probability = double.Parse(peptide.FindElement("p_value").Value);
                result.Add(spec);
            }

            return(result);
        }
    public void Test()
    {
      IPropertyConverter<IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter<IdentifiedSpectrum>();
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.AddProtein("11111");

      var mp2 = new IdentifiedPeptide(mph);
      mp2.AddProtein("22222");
      mp2.AddProtein("33333");

      Assert.AreEqual("Reference", io.Name);
      Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph));

      io.SetProperty(mph, "44444/55555 ! 66666");

      Assert.AreEqual(2, mph.Peptides[0].Proteins.Count);
      Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]);
      Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]);

      Assert.AreEqual(1, mph.Peptides[1].Proteins.Count);
      Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]);
    }
Пример #35
0
        /// <summary>
        ///
        /// Get top one peptide list from xtandem xml file
        ///
        /// </summary>
        /// <param name="fileName">xtandem xml filename</param>
        /// <returns>List of IIdentifiedSpectrum</returns>
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            string sourceFilename = GetSourceFile(fileName);

            List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>();

            XmlDocument doc = new XmlDocument();

            doc.Load(fileName);

            this.xmlHelper = new XmlHelper(doc);

            XmlNode root = doc.DocumentElement;

            Match mSource = Regex.Match(sourceFilename, @"(.+)\.(?:RAW)", RegexOptions.IgnoreCase);

            if (mSource.Success)
            {
                sourceFilename = mSource.Groups[1].Value;
            }
            else
            {
                mSource = Regex.Match(sourceFilename, @"(.+?)\.");
                if (mSource.Success)
                {
                    sourceFilename = mSource.Groups[1].Value;
                }
            }

            XmlNode parameters = xmlHelper.GetFirstChildByNameAndAttribute(root, "group", "label", "input parameters");

            ParseParameters(parameters);

            int pos = sourceFilename.LastIndexOfAny(new char[] { '/', '\\' });

            string rawFileName;

            if (pos > 0)
            {
                rawFileName = sourceFilename.Substring(pos + 1);
            }
            else
            {
                rawFileName = sourceFilename;
            }
            rawFileName = FileUtils.ChangeExtension(rawFileName, "");

            List <XmlNode> groupNodes = xmlHelper.GetChildrenByNameAndAttribute(root, "group", "type", "model");

            foreach (XmlNode groupNode in groupNodes)
            {
                Dictionary <string, IIdentifiedPeptide> pepmap = new Dictionary <string, IIdentifiedPeptide>();

                IIdentifiedSpectrum spectrum = new IdentifiedSpectrum();

                List <XmlNode> proteins = xmlHelper.GetChildren(groupNode, "protein");

                foreach (XmlNode proteinNode in proteins)
                {
                    XmlNode domainNode = xmlHelper.GetValidChild(xmlHelper.GetValidChild(proteinNode, "peptide"), "domain");

                    int numMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value);

                    string preSeq = domainNode.Attributes["pre"].Value;
                    if (preSeq.Equals("["))
                    {
                        preSeq = "-";
                    }

                    string postSeq = domainNode.Attributes["post"].Value;
                    if (postSeq.Equals("]"))
                    {
                        postSeq = "-";
                    }

                    StringBuilder pepSeqSB = new StringBuilder(domainNode.Attributes["seq"].Value);

                    int start = int.Parse(domainNode.Attributes["start"].Value);
                    int end   = int.Parse(domainNode.Attributes["end"].Value);

                    List <XmlNode> modifications = xmlHelper.GetChildren(domainNode, "aa");
                    if (modifications.Count > 0)
                    {
                        List <ModificationItem> items = new List <ModificationItem>();
                        foreach (XmlNode modification in modifications)
                        {
                            int at = int.Parse(modification.Attributes["at"].Value);
                            if (at < start || at > end)
                            {
                                continue;
                            }

                            ModificationItem item = new ModificationItem();
                            item.Type     = modification.Attributes["type"].Value;
                            item.At       = at;
                            item.Modified = MyConvert.ToDouble(modification.Attributes["modified"].Value);
                            if (!staticModifications.ContainsKey(item.Type[0]))
                            {
                                items.Add(item);
                            }
                        }

                        spectrum.Modifications = "";
                        if (items.Count > 0)
                        {
                            items.Sort((m1, m2) => m1.At - m2.At);

                            var mod = "";
                            foreach (ModificationItem item in items)
                            {
                                mod = mod + MyConvert.Format(",{0}({1:0.0000})", item.Type, item.Modified);
                            }
                            spectrum.Modifications = mod.Substring(1);

                            items.Sort((m1, m2) => m2.At - m1.At);
                            foreach (ModificationItem item in items)
                            {
                                var key = GetModifiedKey(item.Modified);
                                if (!dynamicModificationChars.ContainsKey(key))
                                {
                                    AddDynamicModificationChar(key);
                                }
                                char modificationChar = dynamicModificationChars[key];
                                pepSeqSB.Insert(item.At - start + 1, modificationChar.ToString());
                            }

                            spectrum.Modifications = mod.Substring(1);
                        }
                    }

                    StringBuilder sb = new StringBuilder();
                    sb.Append(preSeq.Substring(preSeq.Length - 1));
                    sb.Append(".");
                    sb.Append(pepSeqSB.ToString());
                    sb.Append(".");
                    sb.Append(postSeq[0]);

                    string pepSeq = sb.ToString();

                    if (!pepmap.ContainsKey(pepSeq))
                    {
                        IdentifiedPeptide pep = new IdentifiedPeptide(spectrum);
                        pep.Sequence           = pepSeq;
                        pepmap[pepSeq]         = pep;
                        spectrum.TheoreticalMH = MyConvert.ToDouble(domainNode.Attributes["mh"].Value);
                        spectrum.Score         = MyConvert.ToDouble(domainNode.Attributes["hyperscore"].Value);

                        double nextScore = MyConvert.ToDouble(domainNode.Attributes["nextscore"].Value);
                        spectrum.DeltaScore         = (spectrum.Score - nextScore) / spectrum.Score;
                        spectrum.NumMissedCleavages = int.Parse(domainNode.Attributes["missed_cleavages"].Value);
                    }

                    var    noteNode    = xmlHelper.GetValidChild(proteinNode, "note");
                    string proteinName = noteNode.InnerText.StringBefore(" ").StringBefore("\t");
                    pepmap[pepSeq].AddProtein(proteinName);
                }

                if (spectrum.Peptides.Count > 0)
                {
                    spectrum.DigestProtease = protease;
                    result.Add(spectrum);

                    spectrum.Query.QueryId  = int.Parse(groupNode.Attributes["id"].Value);
                    spectrum.ExperimentalMH = MyConvert.ToDouble(groupNode.Attributes["mh"].Value);
                    spectrum.ExpectValue    = MyConvert.ToDouble(groupNode.Attributes["expect"].Value);

                    XmlNode spectrumNode = xmlHelper.GetFirstChildByNameAndAttribute(groupNode, "group", "label", "fragment ion mass spectrum");
                    XmlNode labelNode    = xmlHelper.GetFirstChildByNameAndAttribute(spectrumNode, "note", "label", "Description");
                    string  title        = labelNode.InnerText.Trim();
                    if (title.StartsWith("RTINSECONDS"))
                    {
                        var rtvalue = title.StringAfter("=").StringBefore(" ").StringBefore("-");
                        spectrum.Query.FileScan.RetentionTime = double.Parse(rtvalue);
                        title = title.StringAfter(" ").Trim();
                    }

                    SequestFilename sf = this.TitleParser.GetValue(title);
                    if (sf.Experimental == null || sf.Experimental.Length == 0)
                    {
                        sf.Experimental = sourceFilename;
                    }
                    spectrum.Query.FileScan.LongFileName = sf.LongFileName;
                    if (sf.RetentionTime > 0 && spectrum.Query.FileScan.RetentionTime == 0)
                    {
                        spectrum.Query.FileScan.RetentionTime = sf.RetentionTime;
                    }

                    spectrum.Query.Charge = int.Parse(groupNode.Attributes["z"].Value);
                    spectrum.Query.Title  = title;
                }
            }
            return(result);
        }
Пример #36
0
        /// <summary>
        ///
        /// Get the query/peptide map from pNovo result.
        ///
        /// </summary>
        /// <param name="filename">pNovo proteins file</param>
        /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param>
        /// <param name="minScore">Minimum score of peptide identified in same spectrum</param>
        /// <returns>Query/peptide map</returns>
        public List <IIdentifiedSpectrum> ParsePeptides(string filename, int maxRank = 10, double minScore = 0.0)
        {
            var result = new List <IIdentifiedSpectrum>();

            SequestFilename sf = null;

            int curIndex = 0;

            using (var sr = new StreamReader(filename))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }

                    if (line.StartsWith("S"))
                    {
                        var title = line.StringAfter("\t");
                        sf       = this.parser.GetValue(title);
                        curIndex = 0;
                        continue;
                    }

                    var parts = line.Split('\t');
                    var score = MyConvert.ToDouble(parts[2]);
                    if (score < minScore)
                    {
                        continue;
                    }

                    curIndex++;

                    IIdentifiedSpectrum curSpectrum;
                    if (curIndex == 1)
                    {
                        curSpectrum = new IdentifiedSpectrum();
                        curSpectrum.Query.FileScan = sf;
                        curSpectrum.Query.Charge   = sf.Charge;
                        curSpectrum.Score          = score;
                        curSpectrum.Rank           = curIndex;
                        result.Add(curSpectrum);
                    }
                    else if (score == result.Last().Score)
                    {
                        curSpectrum = result.Last();
                    }
                    else if (curIndex > maxRank)
                    {
                        continue;
                    }
                    else
                    {
                        curSpectrum = new IdentifiedSpectrum();
                        curSpectrum.Query.FileScan = sf;
                        curSpectrum.Query.Charge   = sf.Charge;
                        curSpectrum.Score          = score;
                        curSpectrum.Rank           = curIndex;
                        result.Add(curSpectrum);
                    }

                    IdentifiedPeptide pep = new IdentifiedPeptide(curSpectrum);
                    pep.Sequence = ModifySequence(parts[1]);
                }
            }
            return(result);
        }
Пример #37
0
        public bool Equals(TargetPeptide obj)
        {
            if (!PeptideMass.Equals(obj.PeptideMass) || !PeptideSequence.Equals(obj.PeptideSequence) || !Modifications.Count.Equals(obj.Modifications.Count) || !IdentifiedPeptide.Equals(obj.IdentifiedPeptide))
            {
                return(false);
            }

            foreach (string key in obj.Modifications.Keys)
            {
                if (obj.Modifications[key] != Modifications[key])
                {
                    return(false);
                }
            }
            return(true);
        }
Пример #38
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            XElement root = XElement.Load(fileName);
            var      name = root.FindElement("AnalysisSoftwareList").
                            FindElement("AnalysisSoftware").
                            FindElement("SoftwareName").
                            FindElement("cvParam").Attribute("name").Value;

            var defaultExp = Path.GetFileNameWithoutExtension(fileName);

            foreach (var ext in extensions)
            {
                if (defaultExp.ToLower().EndsWith(ext))
                {
                    defaultExp = defaultExp.Substring(0, defaultExp.Length - ext.Length);
                }
            }

            //parsing identification protocol first
            var protocols = root.FindElement("AnalysisProtocolCollection");
            var sip       = protocols.FindElement("SpectrumIdentificationProtocol");
            var modMap    = ParseSearchModificationMap(sip.FindElement("ModificationParams"));
            var proteases = ParseEnzymes(sip.FindElement("Enzymes"));
            var protease  = proteases.FirstOrDefault();

            //parsing sequence collection, including protein<->peptide map
            var seqs       = root.FindElement("SequenceCollection");
            var proteinMap = (from ele in seqs.FindElements("DBSequence")
                              let id = ele.Attribute("id").Value
                                       let accession = ParseAccession(ele.Attribute("accession").Value)
                                                       let db = ele.Attribute("searchDatabase_ref").Value
                                                                select new { Id = id, Accession = accession, DB = db }).ToDictionary(m => m.Id);

            var peptideMap = (from ele in seqs.FindElements("Peptide")
                              let id = ele.Attribute("id").Value
                                       let seq = ele.FindElement("PeptideSequence").Value
                                                 let mods = (from modEle in ele.FindElements("Modification")
                                                             let mod = ParseModification(modEle, modMap)
                                                                       where mod != null
                                                                       orderby mod.Location descending
                                                                       select mod).ToArray()
                                                            let numMiss = protease == null ? 0 : protease.GetMissCleavageSiteCount(seq)
                                                                          select new MzIdentPeptideItem()
            {
                Id = id,
                PureSequence = seq,
                Modifications = mods,
                Sequence = GetModifiedSequence(seq, mods),
                NumMissCleavage = numMiss
            }).ToDictionary(m => m.Id);

            var peptideEvidenceMap = (from g in
                                      (from ele in seqs.FindElements("PeptideEvidence")
                                       select new MzIdentPeptideEvidenceItem()
            {
                Id = ele.Attribute("id").Value,
                PeptideRef = ele.Attribute("peptide_ref").Value,
                DbRef = ele.Attribute("dBSequence_ref").Value,
                Pre = ele.Attribute("pre").Value,
                Post = ele.Attribute("post").Value
            }).GroupBy(m => m.Id)
                                      select g.First()).ToDictionary(m => m.Id);

            //now parsing data
            var data = root.FindElement("DataCollection");

            var result       = new List <IIdentifiedSpectrum>();
            var analysisData = data.FindElement("AnalysisData");
            var idList       = analysisData.FindElement("SpectrumIdentificationList");

            foreach (var sir in idList.FindElements("SpectrumIdentificationResult"))
            {
                var items = FilterItems(sir.FindElements("SpectrumIdentificationItem"), peptideMap, peptideEvidenceMap);

                if (items.Count == 0)
                {
                    continue;
                }

                var spectrum = new IdentifiedSpectrum();
                result.Add(spectrum);

                var spectrumId = sir.Attribute("spectrumID").Value;

                var    sirCvParams = GetCvParams(sir);
                string value;
                if (sirCvParams.TryGetValue("MS:1000796", out value))
                {
                    spectrum.Query.FileScan = TitleParser.GetValue(value);
                }
                else
                {
                    if (spectrumId.StartsWith("index=") || spectrumId.StartsWith("scan="))
                    {
                        spectrum.Query.FileScan.Experimental = defaultExp;
                        spectrum.Query.FileScan.FirstScan    = int.Parse(spectrumId.StringAfter("="));
                        spectrum.Query.FileScan.LastScan     = spectrum.Query.FileScan.FirstScan;
                    }
                    else
                    {
                        spectrum.Query.FileScan.Experimental = spectrumId;
                    }
                }

                if (sirCvParams.TryGetValue("MS:1001115", out value))
                {
                    spectrum.Query.FileScan.FirstScan = int.Parse(value);
                }

                if (spectrum.Query.FileScan.FirstScan == 0)
                {
                    throw new Exception(string.Format("Cannot find scan information in file {0}", fileName));
                }

                bool bFirst = true;

                foreach (var sit in items)
                {
                    if (bFirst) //only parse score once
                    {
                        spectrum.Id             = sit.Attribute("id").Value;
                        spectrum.Charge         = int.Parse(sit.Attribute("chargeState").Value);
                        spectrum.TheoreticalMH  = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("calculatedMassToCharge").Value), spectrum.Charge, true);
                        spectrum.ExperimentalMH = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("experimentalMassToCharge").Value), spectrum.Charge, true);

                        var cvParams = GetCvParams(sit);
                        if (cvParams.TryGetValue("MS:1001121", out value))
                        {
                            spectrum.MatchedIonCount = int.Parse(value);
                        }

                        if (cvParams.TryGetValue("MS:1001362", out value))
                        {
                            spectrum.TheoreticalIonCount = int.Parse(value) + spectrum.MatchedIonCount;
                        }

                        ParseScore(spectrum, cvParams);

                        var userParams = GetUserParams(sit);
                        ParseUserParams(spectrum, userParams);

                        bFirst = false;
                    }

                    var peptide = new IdentifiedPeptide(spectrum);
                    var pep_ref = sit.Attribute("peptide_ref").Value;
                    var pep     = peptideMap[pep_ref];
                    spectrum.Modifications = (from m in pep.Modifications
                                              select string.Format("{0}:{1}", m.Location, m.Item.Name)).Reverse().Merge(",");
                    spectrum.NumMissedCleavages = pep.NumMissCleavage;

                    foreach (var per in sit.FindElements("PeptideEvidenceRef"))
                    {
                        var pe_ref = per.Attribute("peptideEvidence_ref").Value;
                        var pe     = peptideEvidenceMap[pe_ref];
                        peptide.Sequence = pe.Pre + "." + pep.Sequence + "." + pe.Post;

                        var protein = proteinMap[pe.DbRef];
                        peptide.AddProtein(protein.Accession);
                    }
                }
            }

            return(result);
        }
        public Dictionary <int, IIdentifiedPeptide> ParsePeptideMap(string fileName)
        {
            SQLiteDBHelper sqlite = new SQLiteDBHelper(fileName);

            Dictionary <int, IIdentifiedPeptide> result = new Dictionary <int, IIdentifiedPeptide>();

            var pniReader = sqlite.ExecuteReader("select distinct(ProcessingNodeID) from peptidescores", null);

            if (!pniReader.Read())
            {
                return(result);
            }
            var nodeid = pniReader.GetInt32(0);

            var pniScore = sqlite.ExecuteReader(string.Format("select scoreid from processingnodescores where processingnodeid={0} and ismainscore=1", nodeid), null);

            if (!pniScore.Read())
            {
                return(result);
            }
            var scoreid = pniScore.GetInt32(0);

            Dictionary <int, IIdentifiedSpectrum> spectra = ParseSpectrumMap(fileName);
            var aas = ParseAminoacids(fileName);

            //读取肽段列表
            string sqlPeptide    = string.Format("select pep.SpectrumID, pep.PeptideID, pep.TotalIonsCount, pep.MatchedIonsCount, pep.ConfidenceLevel, pep.Sequence, pep.MissedCleavages, ps.ScoreValue from Peptides as pep, PeptideScores as ps where pep.PeptideID=ps.PeptideID and ps.ScoreID={0} order by pep.SpectrumID, pep.SearchEngineRank", scoreid);
            var    peptideReader = sqlite.ExecuteReader(sqlPeptide, null);

            Progress.SetMessage("Parsing peptides ...");

            while (peptideReader.Read())
            {
                var specid = peptideReader.GetInt32(0);
                if (!spectra.ContainsKey(specid))
                {
                    continue;
                }

                var pepid          = peptideReader.GetInt32(1);
                var seq            = peptideReader.GetString(5);
                var missedCleavage = peptideReader.GetInt32(6);
                var score          = peptideReader.GetDouble(7);

                IIdentifiedSpectrum spectrum = spectra[specid];
                if (spectrum.Peptides.Count == 0)
                {
                    spectrum.TheoreticalIonCount = peptideReader.GetInt32(2);
                    spectrum.MatchedIonCount     = peptideReader.GetInt32(3);

                    IdentifiedPeptide peptide = new IdentifiedPeptide(spectrum);
                    peptide.ConfidenceLevel     = peptideReader.GetInt32(4);
                    peptide.Sequence            = seq;
                    spectrum.NumMissedCleavages = missedCleavage;

                    spectrum.Score           = score;
                    spectrum.TheoreticalMass = aas.MonoPeptideMass(peptide.Sequence);
                    spectrum.Rank            = 1;

                    spectrum.DeltaScore = 1.0;

                    result[pepid] = peptide;
                    continue;
                }
                else
                {
                    if (score == spectrum.Score)
                    {
                        IIdentifiedPeptide peptide = new IdentifiedPeptide(spectrum);
                        peptide.ConfidenceLevel = peptideReader.GetInt32(4);
                        peptide.Sequence        = seq;
                        result[pepid]           = peptide;
                        continue;
                    }

                    if (seq == spectrum.Peptide.Sequence)
                    {
                        continue;
                    }

                    spectrum.DeltaScore = (spectrum.Score - score) / spectrum.Score;
                }
            }

            //动态氨基酸修饰
            var    modMap        = ParseModifications(fileName);
            string sqlPeptideMod = "select PeptideID, AminoAcidModificationID, Position from PeptidesAminoacidModifications order by Position desc";
            var    pepModReader  = sqlite.ExecuteReader(sqlPeptideMod, null);

            Progress.SetMessage("Parsing peptide modifications ...");
            while (pepModReader.Read())
            {
                var pepid = pepModReader.GetInt32(0);
                if (!result.ContainsKey(pepid))
                {
                    continue;
                }

                var modid    = pepModReader.GetInt32(1);
                var position = pepModReader.GetInt32(2);

                var mod = modMap[modid];

                var peptide   = result[pepid];
                var aminoacid = peptide.Sequence[position];

                if (peptide.IsTopOne())
                {
                    var modStr = string.Format("{0} ({1})", mod.SignStr, aminoacid);
                    if (string.IsNullOrEmpty(peptide.Spectrum.Modifications))
                    {
                        peptide.Spectrum.Modifications = modStr;
                    }
                    else
                    {
                        peptide.Spectrum.Modifications = peptide.Spectrum.Modifications + "; " + modStr;
                    }
                    peptide.Spectrum.TheoreticalMass += mod.DeltaMass;
                }

                var modchar = mod.SignChar;
                var seq     = peptide.Sequence;
                peptide.Sequence = seq.Insert(position + 1, modchar.ToString());
            }

            //动态末端修饰
            string sqlTermMod    = "select PeptideID, TerminalModificationID from PeptidesTerminalModifications";
            var    termModReader = sqlite.ExecuteReader(sqlTermMod, null);

            Progress.SetMessage("Parsing terminal modifications ...");
            while (termModReader.Read())
            {
                var pepid = termModReader.GetInt32(0);
                if (result.ContainsKey(pepid))
                {
                    var modid   = termModReader.GetInt32(1);
                    var peptide = result[pepid];
                    var mod     = modMap[modid];

                    if (peptide.IsTopOne())
                    {
                        if (string.IsNullOrEmpty(peptide.Spectrum.Modifications))
                        {
                            peptide.Spectrum.Modifications = mod.SignStr;
                        }
                        else if (mod.PositionType == 1)
                        {
                            peptide.Spectrum.Modifications = mod.SignStr + "; " + peptide.Spectrum.Modifications;
                        }
                        else
                        {
                            peptide.Spectrum.Modifications = peptide.Spectrum.Modifications + "; " + mod.SignStr;
                        }
                        peptide.Spectrum.TheoreticalMass += mod.DeltaMass;
                    }

                    var modchar = mod.SignChar;
                    var seq     = peptide.Sequence;
                    if (mod.PositionType == 1)
                    {
                        seq = modchar.ToString() + seq;
                    }
                    else
                    {
                        seq = seq + modchar.ToString();
                    }
                    peptide.Sequence = seq;
                }
            }

            //其他Score
            var dcReader = sqlite.ExecuteReader("select ps.PeptideID, pns.ScoreName, ps.ScoreValue from PeptideScores as ps, ProcessingNodeScores as pns where ps.ScoreID=pns.ScoreID and pns.IsMainScore=0", null);

            while (dcReader.Read())
            {
                var pepid = dcReader.GetInt32(0);
                if (result.ContainsKey(pepid))
                {
                    var pep = result[pepid];
                    if (pep.IsTopOne())
                    {
                        var name  = dcReader.GetString(1);
                        var value = dcReader.GetDouble(2);
                        if (name.Equals("SpScore"))
                        {
                            pep.Spectrum.SpScore = value;
                        }
                        else if (name.Equals("ProbabilityScore"))
                        {
                            pep.Spectrum.Probability = value;
                        }
                    }
                }
            }

            return(result);
        }
        public override List <IIdentifiedProtein> ParseProteins(string fileName)
        {
            Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>();

            Application xApp = new Application();

            //得到WorkBook对象, 可以用两种方式之一: 下面的是打开已有的文件
            Workbook xBook = xApp.Workbooks._Open(fileName,
                                                  Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                  , Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                  , Missing.Value, Missing.Value, Missing.Value, Missing.Value);

            try
            {
                Worksheet xSheet = (Worksheet)xBook.Sheets[1];

                int fromRow = 2;
                int endRow  = fromRow;

                for (; endRow <= xSheet.Rows.Count; endRow++)
                {
                    string b = xSheet.Value('B', endRow);
                    if (null == b)
                    {
                        break;
                    }
                }
                endRow--;

                Progress.SetRange(fromRow, endRow);
                Progress.SetMessage("Parsing file ...");
                for (int i = fromRow; i <= endRow; i++)
                {
                    Progress.SetPosition(i);

                    string seq = xSheet.Value('A', i);
                    if (null == seq)//蛋白质信息
                    {
                        continue;
                    }

                    string deltaCn = xSheet.Value('I', i);
                    if (null == deltaCn)//rank > 1
                    {
                        continue;
                    }

                    string protein = xSheet.Value('B', i);
                    if (!proteinMap.ContainsKey(protein))
                    {
                        var p = new IdentifiedProtein(protein);

                        p.Coverage         = MyConvert.ToDouble(xSheet.Value('C', i + 2));
                        p.MolecularWeight  = MyConvert.ToDouble(xSheet.Value('F', i + 2)) * 1000;
                        p.IsoelectricPoint = MyConvert.ToDouble(xSheet.Value('G', i + 2));
                        p.Score            = MyConvert.ToDouble(xSheet.Value('H', i + 2));
                        p.Description      = xSheet.Value('I', i + 2);

                        proteinMap[protein] = p;
                    }

                    var pro = proteinMap[protein];

                    IdentifiedSpectrum spectrum = new IdentifiedSpectrum();
                    IdentifiedPeptide  peptide  = new IdentifiedPeptide(spectrum);
                    peptide.Sequence = seq.ToUpper();
                    peptide.AddProtein(protein);
                    spectrum.Modifications               = xSheet.Value('F', i);
                    spectrum.DeltaScore                  = MyConvert.ToDouble(deltaCn);
                    spectrum.Charge                      = Convert.ToInt32(xSheet.Value('K', i));
                    spectrum.ObservedMz                  = MyConvert.ToDouble(xSheet.Value('L', i));
                    spectrum.TheoreticalMH               = MyConvert.ToDouble(xSheet.Value('M', i));
                    spectrum.Ions                        = xSheet.Value('S', i);
                    spectrum.Query.FileScan.FirstScan    = Convert.ToInt32(xSheet.Value('P', i));
                    spectrum.Query.FileScan.LastScan     = Convert.ToInt32(xSheet.Value('Q', i));
                    spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(xSheet.Value('T', i));

                    pro.Peptides.Add(peptide);
                }
            }
            finally
            {
                xBook.Close(false, Type.Missing, Type.Missing);
            }

            var proteins = proteinMap.Values.ToList();

            return(proteins);
        }
        /// <summary>
        ///
        /// Get the query/peptide map from mascot dat file.
        ///
        /// </summary>
        /// <param name="filename">pFind proteins file</param>
        /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param>
        /// <param name="minScore">Minimum score of peptide identified in same spectrum</param>
        /// <returns>Query/peptide map</returns>
        public Dictionary <int, List <IIdentifiedSpectrum> > ParsePeptides(string filename, int minRank, double minScore)
        {
            var result = new Dictionary <int, List <IIdentifiedSpectrum> >();

            var sourceDir = GetSourceFile(filename);

            using (var sr = new StreamReader(filename))
            {
                var parameters = ParseSection(sr, "Search");

                var mm = ParseModification(parameters);

                foreach (var mod in mm.DynamicModification)
                {
                    if (!this.ModificationCharMap.ContainsKey(mod.Modification))
                    {
                        this.ModificationCharMap[mod.Modification] = ModificationConsts.MODIFICATION_CHAR[this.ModificationCharMap.Count + 1];
                    }
                }

                var headers = ParseSection(sr, "Total");

                var queryCount = int.Parse(headers["Spectra"]);

                Progress.SetRange(1, queryCount);
                for (int queryId = 1; queryId <= queryCount; queryId++)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    Progress.SetPosition(queryId);

                    var speName        = MyConvert.Format("Spectrum{0}", queryId);
                    var peptideSection = ParseSection(sr, speName);

                    int candidateCount = int.Parse(peptideSection["ValidCandidate"]);
                    if (candidateCount == 0)
                    {
                        continue;
                    }

                    var expMH  = MyConvert.ToDouble(peptideSection["MH"]);
                    var expMz  = MyConvert.ToDouble(peptideSection["MZ"]);
                    var charge = int.Parse(peptideSection["Charge"]);

                    var iPeps = new List <IIdentifiedSpectrum>();
                    result[queryId] = iPeps;

                    IIdentifiedSpectrum lastHit = null;
                    int rank = 0;
                    for (int k = 1; k <= candidateCount; k++)
                    {
                        string key      = "NO" + k.ToString();
                        var    scoreKey = key + "_Score";
                        if (!peptideSection.ContainsKey(scoreKey))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        double score = MyConvert.ToDouble(peptideSection[scoreKey]);
                        if (score < minScore || score == 0.0)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }
                            break;
                        }

                        bool bSameRank = null != lastHit && score == lastHit.Score;
                        if (!bSameRank)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }

                            rank++;
                            if (rank > minRank)
                            {
                                break;
                            }
                        }

                        IIdentifiedSpectrum mphit;
                        if (bSameRank)
                        {
                            mphit = lastHit;
                        }
                        else
                        {
                            mphit = new IdentifiedSpectrum();

                            mphit.Rank        = rank;
                            mphit.Score       = score;
                            mphit.ExpectValue = MyConvert.ToDouble(peptideSection[key + "_EValue"]);

                            var mhkey = key + "_MH";
                            if (peptideSection.ContainsKey(mhkey))
                            {
                                mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[mhkey]);
                            }
                            else
                            {
                                mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[key + "_Mass"]);
                            }

                            var micKey = key + "_Matched_Peaks";
                            if (peptideSection.ContainsKey(micKey))
                            {
                                mphit.MatchedIonCount = int.Parse(peptideSection[micKey]);
                                mphit.MatchedTIC      = MyConvert.ToDouble(peptideSection[key + "_Matched_Intensity"]);
                            }

                            var misKey = key + "_MissCleave";
                            if (peptideSection.ContainsKey(misKey))
                            {
                                mphit.NumMissedCleavages = int.Parse(peptideSection[misKey]);
                            }
                            mphit.ExperimentalMH = expMH;
                            mphit.DeltaScore     = 1.0;

                            mphit.Query.QueryId    = queryId;
                            mphit.Query.ObservedMz = expMz;
                            mphit.Query.Charge     = charge;
                            //mphit.Query.MatchCount = queryItem.MatchCount;

                            lastHit = mphit;
                        }

                        var mp = new IdentifiedPeptide(mphit);
                        mp.Sequence = peptideSection[key + "_SQ"];

                        string modificationPos  = peptideSection[key + "_Modify_Pos"];
                        string modificationName = peptideSection[key + "_Modify_Name"];

                        Dictionary <int, string> modifications = GetModifications(modificationPos, modificationName);

                        ModifySequence(mp, modifications, mm);
                        AssignModification(mphit, modifications, mm);

                        string proteins = peptideSection[key + "_Proteins"];
                        var    parts    = proteins.Split(',');
                        for (int i = 1; i < parts.Count(); i++)
                        {
                            mp.AddProtein(parts[i]);
                        }

                        if (!bSameRank)
                        {
                            iPeps.Add(mphit);
                        }
                    }

                    var title = new FileInfo(peptideSection["Input"]).Name;

                    SequestFilename sf = this.TitleParser.GetValue(title);
                    sf.Charge = charge;

                    if (sf.Experimental == null || sf.Experimental.Length == 0)
                    {
                        sf.Experimental = sourceDir;
                    }

                    foreach (IIdentifiedSpectrum mp in iPeps)
                    {
                        mp.Query.Title = title;
                        mp.Query.FileScan.LongFileName = sf.LongFileName;
                    }
                }
            }

            return(result);
        }
Пример #42
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            XElement root = XElement.Load(fileName);

            XElement request = root.FindElement("MSSearch_request");

            //parsing identification protocol first
            var modMap   = ParseSearchModificationMap(request.FindFirstDescendant("MSSearchSettings_variable"));
            var protease = ParseProtease(request.FindFirstDescendant("MSSearchSettings_enzyme"));

            Func <string, int> missCalc;

            if (protease == null)
            {
                missCalc = m => 0;
            }
            else
            {
                missCalc = m => protease.GetMissCleavageSiteCount(m);
            }

            //parsing sequence collection, including protein<->peptide map
            var result   = new List <IIdentifiedSpectrum>();
            var response = root.FindElement("MSSearch_response");
            var scale    = double.Parse(response.FindFirstDescendant("MSResponse_scale").Value);

            var idList = response.FindFirstDescendant("MSResponse_hitsets");

            foreach (var sir in idList.FindElements("MSHitSet"))
            {
                var hits = sir.FindElement("MSHitSet_hits");
                if (hits == null)
                {
                    continue;
                }

                var spectrum = new IdentifiedSpectrum();
                result.Add(spectrum);

                var title = sir.FindElement("MSHitSet_ids").FindElement("MSHitSet_ids_E").Value;
                spectrum.Query.FileScan = this.TitleParser.GetValue(title);

                foreach (var hit in hits.FindElements("MSHits"))
                {
                    var evalue = double.Parse(hit.FindElement("MSHits_evalue").Value);
                    if (spectrum.Peptides.Count > 0)
                    {
                        if (evalue > spectrum.ExpectValue)
                        {
                            continue;
                        }
                        if (evalue < spectrum.ExpectValue)
                        {
                            spectrum.ClearPeptides();
                        }
                    }
                    spectrum.ExpectValue = evalue;
                    spectrum.Score       = -Math.Log(spectrum.ExpectValue);
                    if (spectrum.Query.Charge == 0) // trust the charge from title
                    {
                        spectrum.Query.Charge = int.Parse(hit.FindElement("MSHits_charge").Value);
                    }
                    spectrum.ExperimentalMass = double.Parse(hit.FindElement("MSHits_mass").Value) / scale;
                    spectrum.TheoreticalMass  = double.Parse(hit.FindElement("MSHits_theomass").Value) / scale;

                    var peptide = new IdentifiedPeptide(spectrum);
                    var seq     = hit.FindElement("MSHits_pepstring").Value;
                    spectrum.NumMissedCleavages = missCalc(seq);

                    var mods = hit.FindElement("MSHits_mods");
                    if (mods != null)
                    {
                        var modsloc = (from ele in mods.FindElements("MSModHit")
                                       let loc = int.Parse(ele.FindElement("MSModHit_site").Value)
                                                 let modtype = ele.FindElement("MSModHit_modtype").FindElement("MSMod").Value
                                                               orderby loc descending
                                                               select new { Location = loc, ModType = modtype }).ToList();
                        foreach (var modloc in modsloc)
                        {
                            seq = seq.Insert(modloc.Location + 1, modMap[modloc.ModType]);
                        }
                    }

                    peptide.Sequence = hit.FindElement("MSHits_pepstart").Value + "." + seq + "." + hit.FindElement("MSHits_pepstop").Value;

                    foreach (var pep in hit.FindElement("MSHits_pephits").FindElements("MSPepHit"))
                    {
                        var proteinName = pep.FindElement("MSPepHit_defline").Value.StringBefore(" ").StringBefore("\t");
                        peptide.AddProtein(proteinName);
                    }
                }
            }

            return(result);
        }
        public override List <IIdentifiedProtein> ParseProteins(string fileName)
        {
            Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>();

            using (StreamReader sr = new StreamReader(fileName))
            {
                string   line        = sr.ReadLine();
                string[] headerParts = line.Split('\t');

                int seqIndex    = Array.FindIndex(headerParts, (m => m == "Sequence"));
                int proIndex    = Array.FindIndex(headerParts, (m => m == "Protein Accessions"));
                int modIndex    = Array.FindIndex(headerParts, (m => m == "Modifications"));
                int xcIndex     = Array.FindIndex(headerParts, (m => m == "XCorr"));
                int deltaIndex  = Array.FindIndex(headerParts, (m => m.EndsWith(" Score")));
                int chargeIndex = Array.FindIndex(headerParts, (m => m == "Charge"));
                int obsIndex    = Array.FindIndex(headerParts, (m => m == "m/z [Da]"));
                int mhIndex     = Array.FindIndex(headerParts, (m => m == "MH+ [Da]"));
                int fscanIndex  = Array.FindIndex(headerParts, (m => m == "First Scan"));
                int lscanIndex  = Array.FindIndex(headerParts, (m => m == "Last Scan"));
                int ionIndex    = Array.FindIndex(headerParts, (m => m == "Ions Matched"));
                int fileIndex   = Array.FindIndex(headerParts, (m => m == "Spectrum File"));

                Progress.SetRange(0, sr.BaseStream.Length);
                Progress.SetMessage("Parsing file ...");
                while ((line = sr.ReadLine()) != null)
                {
                    if (line.Trim().Length == 0)
                    {
                        break;
                    }
                    string[] parts = line.Split('\t');
                    if (parts[0].Length == 0)
                    {
                        continue;
                    }

                    Progress.SetPosition(sr.BaseStream.Position);

                    string seq = parts[seqIndex];

                    string deltaCn = parts[deltaIndex];
                    if (deltaCn.Length == 0)//rank > 1
                    {
                        continue;
                    }

                    string protein = parts[proIndex];
                    if (!proteinMap.ContainsKey(protein))
                    {
                        sr.ReadLine();
                        string   proLine  = sr.ReadLine();
                        string[] proParts = proLine.Split('\t');

                        var p = new IdentifiedProtein(protein);

                        p.Coverage         = MyConvert.ToDouble(proParts[2]);
                        p.MolecularWeight  = MyConvert.ToDouble(proParts[5]) * 1000;
                        p.IsoelectricPoint = MyConvert.ToDouble(proParts[6]);
                        p.Score            = MyConvert.ToDouble(proParts[7]);
                        p.Description      = proParts[8];

                        proteinMap[protein] = p;
                    }

                    var pro = proteinMap[protein];

                    IdentifiedSpectrum spectrum = new IdentifiedSpectrum();
                    IdentifiedPeptide  peptide  = new IdentifiedPeptide(spectrum);
                    peptide.Sequence = seq.ToUpper();
                    peptide.AddProtein(protein);
                    spectrum.Modifications               = parts[modIndex];
                    spectrum.DeltaScore                  = MyConvert.ToDouble(deltaCn);
                    spectrum.Charge                      = Convert.ToInt32(parts[chargeIndex]);
                    spectrum.ObservedMz                  = MyConvert.ToDouble(parts[obsIndex]);
                    spectrum.TheoreticalMH               = MyConvert.ToDouble(parts[mhIndex]);
                    spectrum.Ions                        = parts[ionIndex];
                    spectrum.Query.FileScan.FirstScan    = Convert.ToInt32(parts[fscanIndex]);
                    spectrum.Query.FileScan.LastScan     = Convert.ToInt32(parts[lscanIndex]);
                    spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(parts[fileIndex]);

                    pro.Peptides.Add(peptide);
                }
            }

            var proteins = proteinMap.Values.ToList();

            return(proteins);
        }