Exemple #1
0
    private static List<IIdentifiedProtein> InitProteins()
    {
      var mph1 = new IdentifiedSpectrum();
      mph1.Query.FileScan.Experimental = "EXP1";
      var mp1 = new IdentifiedPeptide(mph1);
      mp1.AddProtein("Protein1");
      mp1.AddProtein("Protein2");
      mp1.Sequence = "SEQ1";

      var mph2 = new IdentifiedSpectrum();
      mph2.Query.FileScan.Experimental = "EXP2";
      var mp2 = new IdentifiedPeptide(mph2);
      mp2.AddProtein("Protein1");
      mp2.AddProtein("Protein3");
      mp2.Sequence = "SEQ2";

      var mpro1 = new IdentifiedProtein("Protein1");
      mpro1.Peptides.Add(mp1);
      mpro1.Peptides.Add(mp2);

      var mpro2 = new IdentifiedProtein("Protein2");
      mpro2.Peptides.Add(mp1);

      var mpro3 = new IdentifiedProtein("Protein3");
      mpro3.Peptides.Add(mp2);

      var result = new List<IIdentifiedProtein>();

      result.Add(mpro3);
      result.Add(mpro2);
      result.Add(mpro1);

      return result;
    }
Exemple #2
0
        private void ParseSearchHit(IIdentifiedSpectrum sph, XElement searchHit, PepXmlModifications ppmods)
        {
            var sp = new IdentifiedPeptide(sph);

            var mod_info = searchHit.FindFirstDescendant("modification_info");

            string seq = searchHit.Attribute("peptide").Value;

            if (mod_info != null)
            {
                var modified_peptide = mod_info.Attribute("modified_peptide");
                if (modified_peptide != null && !modReg.Match(modified_peptide.Value).Success)
                {
                    seq = modified_peptide.Value;
                }
                else
                {
                    var pureSeq = seq;
                    var modaas  = PeptideProphetUtils.ParseModificationAminoacidMass(mod_info);
                    if (modaas != null && modaas.Count > 0)
                    {
                        modaas.Reverse();
                        foreach (var modaa in modaas)
                        {
                            string modchar = FindModificationChar(ppmods, modaa, pureSeq);
                            seq = seq.Insert(modaa.Position, modchar);
                        }
                    }
                }
            }

            if (searchHit.Attribute("peptide_prev_aa") != null)
            {
                sp.Sequence = searchHit.Attribute("peptide_prev_aa").Value + "." +
                              seq + "." +
                              searchHit.Attribute("peptide_next_aa").Value;
            }
            else
            {
                sp.Sequence = seq;
            }

            sph.NumMissedCleavages = GetAttributeValue(searchHit, "num_missed_cleavages", 0);
            sph.NumProteaseTermini = GetAttributeValue(searchHit, "num_tol_term", 2);

            sp.AddProtein(searchHit.Attribute("protein").Value);

            var NumTotalProteins = int.Parse(searchHit.Attribute("num_tot_proteins").Value);

            if (NumTotalProteins > 1)
            {
                var alternative_proteins = searchHit.FindDescendants("alternative_protein");
                foreach (var alternative_protein in alternative_proteins)
                {
                    sp.AddProtein(alternative_protein.Attribute("protein").Value);
                }
            }

            ParseScoreAndOtherInformation(sph, searchHit);
        }
        public void Test()
        {
            IPropertyConverter <IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter <IdentifiedSpectrum>();
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.AddProtein("11111");

            var mp2 = new IdentifiedPeptide(mph);

            mp2.AddProtein("22222");
            mp2.AddProtein("33333");

            Assert.AreEqual("Reference", io.Name);
            Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph));

            io.SetProperty(mph, "44444/55555 ! 66666");

            Assert.AreEqual(2, mph.Peptides[0].Proteins.Count);
            Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]);
            Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]);

            Assert.AreEqual(1, mph.Peptides[1].Proteins.Count);
            Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]);
        }
    public void TestGetProteinString()
    {
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.AddProtein("P1");

      var mp2 = new IdentifiedPeptide(mph);
      mp2.AddProtein("P2");
      mp2.AddProtein("P3");

      Assert.AreEqual("P1 ! P2/P3", MascotPeptideHitTextWriter.GetProteinString(mph));
    }
Exemple #5
0
        public void TestBuildProteins()
        {
            var mph1 = new IdentifiedSpectrum();

            mph1.Query.FileScan.Experimental = "EXP1";
            var mp1 = new IdentifiedPeptide(mph1);

            mp1.AddProtein("Protein1");
            mp1.AddProtein("Protein2");

            var mph2 = new IdentifiedSpectrum();

            mph2.Query.FileScan.Experimental = "EXP2";
            var mp2 = new IdentifiedPeptide(mph2);

            mp2.AddProtein("Protein1");
            mp2.AddProtein("Protein3");

            var mphs = new List <IIdentifiedSpectrum>();

            mphs.Add(mph1);
            mphs.Add(mph2);

            List <IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs);

            Assert.AreEqual(3, proteins.Count);

            foreach (IdentifiedProtein mp in proteins)
            {
                if (mp.Name.Equals("Protein1"))
                {
                    Assert.AreEqual(2, mp.Peptides.Count);
                    continue;
                }

                if (mp.Name.Equals("Protein2"))
                {
                    Assert.AreEqual(1, mp.Peptides.Count);
                    Assert.AreEqual(mp1, mp.Peptides[0]);
                    continue;
                }

                if (mp.Name.Equals("Protein3"))
                {
                    Assert.AreEqual(1, mp.Peptides.Count);
                    Assert.AreEqual(mp2, mp.Peptides[0]);
                    continue;
                }
            }
        }
        public void TestGetProteinString()
        {
            var mph = new IdentifiedSpectrum();

            var mp1 = new IdentifiedPeptide(mph);

            mp1.AddProtein("P1");

            var mp2 = new IdentifiedPeptide(mph);

            mp2.AddProtein("P2");
            mp2.AddProtein("P3");

            Assert.AreEqual("P1 ! P2/P3", MascotPeptideHitTextWriter.GetProteinString(mph));
        }
        public override void SetProperty(T t, string value)
        {
            string[] proteins = reg.Split(value);

            if (t.Peptides.Count != proteins.Length)
            {
                t.ClearPeptides();

                for (int i = 0; i < proteins.Length; i++)
                {
                    IIdentifiedPeptide mp = new IdentifiedPeptide(t);

                    string[] parts = proteins[i].Split(chars);
                    foreach (string part in parts)
                    {
                        mp.AddProtein(part);
                    }
                }
            }
            else
            {
                for (int i = 0; i < proteins.Length; i++)
                {
                    string[] parts = proteins[i].Split(chars);
                    t.Peptides[i].ClearProteins();
                    foreach (string part in parts)
                    {
                        t.Peptides[i].AddProtein(part);
                    }
                }
            }
        }
Exemple #8
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result = new List <IIdentifiedSpectrum>();
            XElement root   = XElement.Load(fileName);
            var      psms   = root.FindElement("psms").FindElements("psm");

            foreach (var psm in psms)
            {
                IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                spec.Id            = psm.FindAttribute("psm_id").Value.StringAfter("decoy_");
                spec.FromDecoy     = psm.FindAttribute("decoy").Value.Equals("true");
                spec.SpScore       = double.Parse(psm.FindElement("svm_score").Value);
                spec.QValue        = double.Parse(psm.FindElement("q_value").Value);
                spec.Score         = double.Parse(psm.FindElement("pep").Value);
                spec.Probability   = double.Parse(psm.FindElement("p_value").Value);
                spec.TheoreticalMH = double.Parse(psm.FindElement("calc_mass").Value);
                spec.Query.FileScan.Experimental = Path.GetFileName(fileName).StringBefore(".");
                var pep    = new IdentifiedPeptide(spec);
                var pepseq = psm.FindElement("peptide_seq");
                pep.Sequence = pepseq.FindAttribute("seq").Value;
                pep.AddProtein(psm.FindElement("protein_id").Value);
                result.Add(spec);
            }

            return(result);
        }
Exemple #9
0
    public void TestBuildProteins()
    {
      var mph1 = new IdentifiedSpectrum();
      mph1.Query.FileScan.Experimental = "EXP1";
      var mp1 = new IdentifiedPeptide(mph1);
      mp1.AddProtein("Protein1");
      mp1.AddProtein("Protein2");

      var mph2 = new IdentifiedSpectrum();
      mph2.Query.FileScan.Experimental = "EXP2";
      var mp2 = new IdentifiedPeptide(mph2);
      mp2.AddProtein("Protein1");
      mp2.AddProtein("Protein3");

      var mphs = new List<IIdentifiedSpectrum>();
      mphs.Add(mph1);
      mphs.Add(mph2);

      List<IIdentifiedProtein> proteins = MascotUtils.BuildProteins(mphs);

      Assert.AreEqual(3, proteins.Count);

      foreach (IdentifiedProtein mp in proteins)
      {
        if (mp.Name.Equals("Protein1"))
        {
          Assert.AreEqual(2, mp.Peptides.Count);
          continue;
        }

        if (mp.Name.Equals("Protein2"))
        {
          Assert.AreEqual(1, mp.Peptides.Count);
          Assert.AreEqual(mp1, mp.Peptides[0]);
          continue;
        }

        if (mp.Name.Equals("Protein3"))
        {
          Assert.AreEqual(1, mp.Peptides.Count);
          Assert.AreEqual(mp2, mp.Peptides[0]);
          continue;
        }
      }
    }
    public void TestAddProtein()
    {
      var pi = new IdentifiedPeptide(new IdentifiedSpectrum());
      pi.AddProtein("AAAAA\tBBBBB");
      Assert.AreEqual(1, pi.Proteins.Count);
      Assert.AreEqual("AAAAA BBBBB", pi.Proteins[0]);

      pi.SetProtein(0, "CCCCC\tDDDDD");
      Assert.AreEqual(1, pi.Proteins.Count);
      Assert.AreEqual("CCCCC DDDDD", pi.Proteins[0]);
    }
Exemple #11
0
        private static List <IIdentifiedProtein> InitProteins()
        {
            var mph1 = new IdentifiedSpectrum();

            mph1.Query.FileScan.Experimental = "EXP1";
            var mp1 = new IdentifiedPeptide(mph1);

            mp1.AddProtein("Protein1");
            mp1.AddProtein("Protein2");
            mp1.Sequence = "SEQ1";

            var mph2 = new IdentifiedSpectrum();

            mph2.Query.FileScan.Experimental = "EXP2";
            var mp2 = new IdentifiedPeptide(mph2);

            mp2.AddProtein("Protein1");
            mp2.AddProtein("Protein3");
            mp2.Sequence = "SEQ2";

            var mpro1 = new IdentifiedProtein("Protein1");

            mpro1.Peptides.Add(mp1);
            mpro1.Peptides.Add(mp2);

            var mpro2 = new IdentifiedProtein("Protein2");

            mpro2.Peptides.Add(mp1);

            var mpro3 = new IdentifiedProtein("Protein3");

            mpro3.Peptides.Add(mp2);

            var result = new List <IIdentifiedProtein>();

            result.Add(mpro3);
            result.Add(mpro2);
            result.Add(mpro1);

            return(result);
        }
        public void TestAddProtein()
        {
            var pi = new IdentifiedPeptide(new IdentifiedSpectrum());

            pi.AddProtein("AAAAA\tBBBBB");
            Assert.AreEqual(1, pi.Proteins.Count);
            Assert.AreEqual("AAAAA BBBBB", pi.Proteins[0]);

            pi.SetProtein(0, "CCCCC\tDDDDD");
            Assert.AreEqual(1, pi.Proteins.Count);
            Assert.AreEqual("CCCCC DDDDD", pi.Proteins[0]);
        }
    public void TestNoredundant()
    {
      string header = "\t\"File, Scan(s)\"\tSequence\tMH+\tDiff(MH+)\tCharge\tRank\tScore\tDeltaScore\tExpectValue\tQuery\tIons\tReference\tDIFF_MODIFIED_CANDIDATE\tPI\tMissCleavage\tModification";
      IPropertyConverter<IIdentifiedSpectrum> converter = IdentifiedSpectrumPropertyConverterFactory.GetInstance().GetConverters(header, '\t');

      Assert.AreEqual(header, converter.Name);

      IIdentifiedSpectrum mphit = new IdentifiedSpectrum();
      mphit.Query.FileScan.ShortFileName = "AAA,1-2";

      IdentifiedPeptide mp1 = new IdentifiedPeptide(mphit);
      mp1.Sequence = "AAAAA";
      mp1.AddProtein("PROTEIN1");
      mp1.AddProtein("PROTEIN2");

      IdentifiedPeptide mp2 = new IdentifiedPeptide(mphit);
      mp2.Sequence = "BBBBB";
      mp2.AddProtein("PROTEIN3");

      mphit.TheoreticalMH = 1000.00102;
      mphit.ExperimentalMH = 1000.0;
      mphit.Query.Charge = 2;
      mphit.Rank = 1;
      mphit.Score = 100.2;
      mphit.DeltaScore = 0.5;
      mphit.ExpectValue = 1.1e-2;
      mphit.Query.QueryId = 10;
      mphit.NumMissedCleavages = 1;
      mphit.Modifications = "O18(1)";

      string expect = "	AAA,1 - 2	AAAAA ! BBBBB	1000.00102	0.00102	2	1	100.2	0.5	1.10E-002	10	0|0	PROTEIN1/PROTEIN2 ! PROTEIN3		0.00	1	O18(1)";
      Assert.AreEqual(expect, converter.GetProperty(mphit));

      string expectNew = "	BBB,2 - 3	BBBBB	1002.00783	-0.00200	3	2	200.2	0.6	1.20E-003	20	0|0	PROTEIN2/PROTEIN4		0.00	2	O18(2)";
      converter.SetProperty(mphit, expectNew);
      Assert.AreEqual(expectNew, converter.GetProperty(mphit));
    }
    public void Test()
    {
      IPropertyConverter<IdentifiedSpectrum> io = new IdentifiedSpectrumReferenceConverter<IdentifiedSpectrum>();
      var mph = new IdentifiedSpectrum();

      var mp1 = new IdentifiedPeptide(mph);
      mp1.AddProtein("11111");

      var mp2 = new IdentifiedPeptide(mph);
      mp2.AddProtein("22222");
      mp2.AddProtein("33333");

      Assert.AreEqual("Reference", io.Name);
      Assert.AreEqual("11111 ! 22222/33333", io.GetProperty(mph));

      io.SetProperty(mph, "44444/55555 ! 66666");

      Assert.AreEqual(2, mph.Peptides[0].Proteins.Count);
      Assert.AreEqual("44444", mph.Peptides[0].Proteins[0]);
      Assert.AreEqual("55555", mph.Peptides[0].Proteins[1]);

      Assert.AreEqual(1, mph.Peptides[1].Proteins.Count);
      Assert.AreEqual("66666", mph.Peptides[1].Proteins[0]);
    }
Exemple #15
0
        //  1.   1 /  1          0 1964.9940  0.0000  5.6970  2133.9  21/30  sw|P02666|CASBBOVIN   +1  K.FQSEEQQQTEDELQDK.I
        protected bool ParseFromOutfileLine(string line, IdentifiedSpectrum entry)
        {
            //  Console.Out.WriteLine(line);
            //   dfadfas  entry.IsProteinFromOutFile = true;
            string sLine = line.Trim().Replace('/', ' ');

            string[] sLines = this.reg.Split(sLine);
            if (sLines.Length < itemIndex.MinCount)
            {
                return(false);
            }

            entry.Rank                = int.Parse(sLines[itemIndex.RankIndex]);
            entry.SpRank              = int.Parse(sLines[itemIndex.SpRankIndex]);
            entry.TheoreticalMH       = MyConvert.ToDouble(sLines[itemIndex.TheoreticalMHIndex]);
            entry.DeltaScore          = MyConvert.ToDouble(sLines[itemIndex.DeltaScoreIndex]);
            entry.Score               = MyConvert.ToDouble(sLines[itemIndex.ScoreIndex]);
            entry.SpScore             = MyConvert.ToDouble(sLines[itemIndex.SpScoreIndex]);
            entry.MatchedIonCount     = int.Parse(sLines[itemIndex.MatchedIonCountIndex]);
            entry.TheoreticalIonCount = int.Parse(sLines[itemIndex.TheoreticalIonCountIndex]);

            entry.ClearPeptides();
            string sequence;

            if ('+' != sLines[itemIndex.SequenceIndex][0])
            {
                entry.DuplicatedCount = 0;
                sequence = sLines[itemIndex.SequenceIndex];
            }
            else
            {
                entry.DuplicatedCount = int.Parse(sLines[itemIndex.SequenceIndex].Substring(1, sLines[itemIndex.SequenceIndex].Length - 1));
                sequence = sLines[itemIndex.SequenceIndex + 1];
            }

            CheckSequenceValid(ref sequence);

            var sp = new IdentifiedPeptide(entry);

            sp.Sequence = sequence;
            sp.AddProtein(sLines[itemIndex.ProteinIndex]);

            return(true);
        }
Exemple #16
0
        protected bool ParseFromOutfileLineWithId(List <string> sLines, IdentifiedSpectrum entry)
        {
            if (sLines.Count < 12)
            {
                return(false);
            }

            //entry.Index = int.Parse(sLines[0].Substring(0, sLines[0].Length - 1));
            entry.Rank   = int.Parse(sLines[1]);
            entry.SpRank = int.Parse(sLines[2]);
            //entry.Id = int.Parse(sLines[3]);
            entry.TheoreticalMH       = MyConvert.ToDouble(sLines[4]);
            entry.DeltaScore          = MyConvert.ToDouble(sLines[5]);
            entry.Score               = MyConvert.ToDouble(sLines[6]);
            entry.SpScore             = MyConvert.ToDouble(sLines[7]);
            entry.MatchedIonCount     = int.Parse(sLines[8]);
            entry.TheoreticalIonCount = int.Parse(sLines[9]);

            entry.ClearPeptides();
            string sequence;

            if ('+' != sLines[11][0])
            {
                entry.DuplicatedCount = 0;
                sequence = sLines[11];
            }
            else
            {
                entry.DuplicatedCount = int.Parse(sLines[11].Substring(1, sLines[11].Length - 1));
                sequence = sLines[12];
            }

            CheckSequenceValid(ref sequence);

            var sp = new IdentifiedPeptide(entry);

            sp.Sequence = sequence;
            sp.AddProtein(sLines[10]);

            return(true);
        }
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result       = new List <IIdentifiedSpectrum>();
            XElement root         = XElement.Load(fileName);
            var      features     = root.FindElement("featureDescriptions");
            var      descriptions = features.FindElements("featureDescription");

            var missIndex = FindIndex(fileName, descriptions, "# Missed Cleavages");

            var scans = root.FindElements("fragSpectrumScan");

            foreach (var scan in scans)
            {
                var scanNumber = int.Parse(scan.FindAttribute("scanNumber").Value);
                var psms       = scan.FindElements("peptideSpectrumMatch");
                foreach (var psm in psms)
                {
                    IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                    spec.Query.QueryId  = scanNumber;
                    spec.Id             = psm.FindAttribute("id").Value.StringAfter("decoy_");
                    spec.FromDecoy      = psm.FindAttribute("isDecoy").Value.Equals("true");
                    spec.TheoreticalMH  = double.Parse(psm.FindAttribute("calculatedMassToCharge").Value);
                    spec.ExperimentalMH = double.Parse(psm.FindAttribute("experimentalMassToCharge").Value);
                    spec.Query.Charge   = int.Parse(psm.FindAttribute("chargeState").Value);
                    var pep = new IdentifiedPeptide(spec);
                    pep.Sequence = psm.FindElement("peptide").FindElement("peptideSequence").Value;
                    pep.AddProtein(psm.FindElement("occurence").FindAttribute("proteinId").Value);

                    var featureEles = psm.FindElement("features").FindElements("feature");
                    //The first one is the score.
                    spec.Score = double.Parse(featureEles[0].Value);
                    spec.NumMissedCleavages = int.Parse(featureEles[missIndex].Value);
                    result.Add(spec);
                }
            }

            return(result);
        }
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            var      result   = new List <IIdentifiedSpectrum>();
            XElement root     = XElement.Load(fileName);
            var      peptides = root.FindElement("peptides").FindElements("peptide");

            foreach (var peptide in peptides)
            {
                IIdentifiedSpectrum spec = new IdentifiedSpectrum();
                var pep = new IdentifiedPeptide(spec);

                pep.Sequence         = peptide.FindAttribute("peptide_id").Value;
                spec.FromDecoy       = peptide.FindAttribute("decoy").Value.Equals("true");
                spec.SpScore         = double.Parse(peptide.FindElement("svm_score").Value);
                spec.QValue          = double.Parse(peptide.FindElement("q_value").Value);
                spec.Score           = double.Parse(peptide.FindElement("pep").Value);
                spec.TheoreticalMass = double.Parse(peptide.FindElement("calc_mass").Value);
                pep.AddProtein(peptide.FindElement("protein_id").Value);
                spec.Probability = double.Parse(peptide.FindElement("p_value").Value);
                result.Add(spec);
            }

            return(result);
        }
        public override List <IIdentifiedProtein> ParseProteins(string fileName)
        {
            Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>();

            using (StreamReader sr = new StreamReader(fileName))
            {
                string   line        = sr.ReadLine();
                string[] headerParts = line.Split('\t');

                int seqIndex    = Array.FindIndex(headerParts, (m => m == "Sequence"));
                int proIndex    = Array.FindIndex(headerParts, (m => m == "Protein Accessions"));
                int modIndex    = Array.FindIndex(headerParts, (m => m == "Modifications"));
                int xcIndex     = Array.FindIndex(headerParts, (m => m == "XCorr"));
                int deltaIndex  = Array.FindIndex(headerParts, (m => m.EndsWith(" Score")));
                int chargeIndex = Array.FindIndex(headerParts, (m => m == "Charge"));
                int obsIndex    = Array.FindIndex(headerParts, (m => m == "m/z [Da]"));
                int mhIndex     = Array.FindIndex(headerParts, (m => m == "MH+ [Da]"));
                int fscanIndex  = Array.FindIndex(headerParts, (m => m == "First Scan"));
                int lscanIndex  = Array.FindIndex(headerParts, (m => m == "Last Scan"));
                int ionIndex    = Array.FindIndex(headerParts, (m => m == "Ions Matched"));
                int fileIndex   = Array.FindIndex(headerParts, (m => m == "Spectrum File"));

                Progress.SetRange(0, sr.BaseStream.Length);
                Progress.SetMessage("Parsing file ...");
                while ((line = sr.ReadLine()) != null)
                {
                    if (line.Trim().Length == 0)
                    {
                        break;
                    }
                    string[] parts = line.Split('\t');
                    if (parts[0].Length == 0)
                    {
                        continue;
                    }

                    Progress.SetPosition(sr.BaseStream.Position);

                    string seq = parts[seqIndex];

                    string deltaCn = parts[deltaIndex];
                    if (deltaCn.Length == 0)//rank > 1
                    {
                        continue;
                    }

                    string protein = parts[proIndex];
                    if (!proteinMap.ContainsKey(protein))
                    {
                        sr.ReadLine();
                        string   proLine  = sr.ReadLine();
                        string[] proParts = proLine.Split('\t');

                        var p = new IdentifiedProtein(protein);

                        p.Coverage         = MyConvert.ToDouble(proParts[2]);
                        p.MolecularWeight  = MyConvert.ToDouble(proParts[5]) * 1000;
                        p.IsoelectricPoint = MyConvert.ToDouble(proParts[6]);
                        p.Score            = MyConvert.ToDouble(proParts[7]);
                        p.Description      = proParts[8];

                        proteinMap[protein] = p;
                    }

                    var pro = proteinMap[protein];

                    IdentifiedSpectrum spectrum = new IdentifiedSpectrum();
                    IdentifiedPeptide  peptide  = new IdentifiedPeptide(spectrum);
                    peptide.Sequence = seq.ToUpper();
                    peptide.AddProtein(protein);
                    spectrum.Modifications               = parts[modIndex];
                    spectrum.DeltaScore                  = MyConvert.ToDouble(deltaCn);
                    spectrum.Charge                      = Convert.ToInt32(parts[chargeIndex]);
                    spectrum.ObservedMz                  = MyConvert.ToDouble(parts[obsIndex]);
                    spectrum.TheoreticalMH               = MyConvert.ToDouble(parts[mhIndex]);
                    spectrum.Ions                        = parts[ionIndex];
                    spectrum.Query.FileScan.FirstScan    = Convert.ToInt32(parts[fscanIndex]);
                    spectrum.Query.FileScan.LastScan     = Convert.ToInt32(parts[lscanIndex]);
                    spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(parts[fileIndex]);

                    pro.Peptides.Add(peptide);
                }
            }

            var proteins = proteinMap.Values.ToList();

            return(proteins);
        }
Exemple #20
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            XElement root = XElement.Load(fileName);
            var      name = root.FindElement("AnalysisSoftwareList").
                            FindElement("AnalysisSoftware").
                            FindElement("SoftwareName").
                            FindElement("cvParam").Attribute("name").Value;

            var defaultExp = Path.GetFileNameWithoutExtension(fileName);

            foreach (var ext in extensions)
            {
                if (defaultExp.ToLower().EndsWith(ext))
                {
                    defaultExp = defaultExp.Substring(0, defaultExp.Length - ext.Length);
                }
            }

            //parsing identification protocol first
            var protocols = root.FindElement("AnalysisProtocolCollection");
            var sip       = protocols.FindElement("SpectrumIdentificationProtocol");
            var modMap    = ParseSearchModificationMap(sip.FindElement("ModificationParams"));
            var proteases = ParseEnzymes(sip.FindElement("Enzymes"));
            var protease  = proteases.FirstOrDefault();

            //parsing sequence collection, including protein<->peptide map
            var seqs       = root.FindElement("SequenceCollection");
            var proteinMap = (from ele in seqs.FindElements("DBSequence")
                              let id = ele.Attribute("id").Value
                                       let accession = ParseAccession(ele.Attribute("accession").Value)
                                                       let db = ele.Attribute("searchDatabase_ref").Value
                                                                select new { Id = id, Accession = accession, DB = db }).ToDictionary(m => m.Id);

            var peptideMap = (from ele in seqs.FindElements("Peptide")
                              let id = ele.Attribute("id").Value
                                       let seq = ele.FindElement("PeptideSequence").Value
                                                 let mods = (from modEle in ele.FindElements("Modification")
                                                             let mod = ParseModification(modEle, modMap)
                                                                       where mod != null
                                                                       orderby mod.Location descending
                                                                       select mod).ToArray()
                                                            let numMiss = protease == null ? 0 : protease.GetMissCleavageSiteCount(seq)
                                                                          select new MzIdentPeptideItem()
            {
                Id = id,
                PureSequence = seq,
                Modifications = mods,
                Sequence = GetModifiedSequence(seq, mods),
                NumMissCleavage = numMiss
            }).ToDictionary(m => m.Id);

            var peptideEvidenceMap = (from g in
                                      (from ele in seqs.FindElements("PeptideEvidence")
                                       select new MzIdentPeptideEvidenceItem()
            {
                Id = ele.Attribute("id").Value,
                PeptideRef = ele.Attribute("peptide_ref").Value,
                DbRef = ele.Attribute("dBSequence_ref").Value,
                Pre = ele.Attribute("pre").Value,
                Post = ele.Attribute("post").Value
            }).GroupBy(m => m.Id)
                                      select g.First()).ToDictionary(m => m.Id);

            //now parsing data
            var data = root.FindElement("DataCollection");

            var result       = new List <IIdentifiedSpectrum>();
            var analysisData = data.FindElement("AnalysisData");
            var idList       = analysisData.FindElement("SpectrumIdentificationList");

            foreach (var sir in idList.FindElements("SpectrumIdentificationResult"))
            {
                var items = FilterItems(sir.FindElements("SpectrumIdentificationItem"), peptideMap, peptideEvidenceMap);

                if (items.Count == 0)
                {
                    continue;
                }

                var spectrum = new IdentifiedSpectrum();
                result.Add(spectrum);

                var spectrumId = sir.Attribute("spectrumID").Value;

                var    sirCvParams = GetCvParams(sir);
                string value;
                if (sirCvParams.TryGetValue("MS:1000796", out value))
                {
                    spectrum.Query.FileScan = TitleParser.GetValue(value);
                }
                else
                {
                    if (spectrumId.StartsWith("index=") || spectrumId.StartsWith("scan="))
                    {
                        spectrum.Query.FileScan.Experimental = defaultExp;
                        spectrum.Query.FileScan.FirstScan    = int.Parse(spectrumId.StringAfter("="));
                        spectrum.Query.FileScan.LastScan     = spectrum.Query.FileScan.FirstScan;
                    }
                    else
                    {
                        spectrum.Query.FileScan.Experimental = spectrumId;
                    }
                }

                if (sirCvParams.TryGetValue("MS:1001115", out value))
                {
                    spectrum.Query.FileScan.FirstScan = int.Parse(value);
                }

                if (spectrum.Query.FileScan.FirstScan == 0)
                {
                    throw new Exception(string.Format("Cannot find scan information in file {0}", fileName));
                }

                bool bFirst = true;

                foreach (var sit in items)
                {
                    if (bFirst) //only parse score once
                    {
                        spectrum.Id             = sit.Attribute("id").Value;
                        spectrum.Charge         = int.Parse(sit.Attribute("chargeState").Value);
                        spectrum.TheoreticalMH  = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("calculatedMassToCharge").Value), spectrum.Charge, true);
                        spectrum.ExperimentalMH = PrecursorUtils.MzToMH(double.Parse(sit.Attribute("experimentalMassToCharge").Value), spectrum.Charge, true);

                        var cvParams = GetCvParams(sit);
                        if (cvParams.TryGetValue("MS:1001121", out value))
                        {
                            spectrum.MatchedIonCount = int.Parse(value);
                        }

                        if (cvParams.TryGetValue("MS:1001362", out value))
                        {
                            spectrum.TheoreticalIonCount = int.Parse(value) + spectrum.MatchedIonCount;
                        }

                        ParseScore(spectrum, cvParams);

                        var userParams = GetUserParams(sit);
                        ParseUserParams(spectrum, userParams);

                        bFirst = false;
                    }

                    var peptide = new IdentifiedPeptide(spectrum);
                    var pep_ref = sit.Attribute("peptide_ref").Value;
                    var pep     = peptideMap[pep_ref];
                    spectrum.Modifications = (from m in pep.Modifications
                                              select string.Format("{0}:{1}", m.Location, m.Item.Name)).Reverse().Merge(",");
                    spectrum.NumMissedCleavages = pep.NumMissCleavage;

                    foreach (var per in sit.FindElements("PeptideEvidenceRef"))
                    {
                        var pe_ref = per.Attribute("peptideEvidence_ref").Value;
                        var pe     = peptideEvidenceMap[pe_ref];
                        peptide.Sequence = pe.Pre + "." + pep.Sequence + "." + pe.Post;

                        var protein = proteinMap[pe.DbRef];
                        peptide.AddProtein(protein.Accession);
                    }
                }
            }

            return(result);
        }
Exemple #21
0
        public Dictionary <int, List <IIdentifiedSpectrum> > DoParsePeptides(string datFilename, int minRank, double minScore, bool isDecoy)
        {
            var result = new Dictionary <int, List <IIdentifiedSpectrum> >();

            Dictionary <string, string> headers;
            int queryCount;
            Dictionary <int, MascotQueryItem> queryItems;
            Dictionary <string, string>       peptideSection;

            var prefix = isDecoy ? "decoy_" : "";

            using (var sr = new StreamReader(datFilename))
            {
                InitializeBoundary(sr);

                CurrentParameters = ParseSection(sr, "parameters");

                var hasDecoy = CurrentParameters.ContainsKey("DECOY") && CurrentParameters["DECOY"].Equals("1");

                if (!hasDecoy && isDecoy)
                {
                    return(result);
                }

                var masses = ParseSection(sr, "masses");

                CurrentModifications = ParseModification(masses);

                long curPos = sr.GetCharpos();

                CurrentProtease = ParseEnzyme(sr);

                sr.SetCharpos(curPos);

                headers    = ParseSection(sr, "header");
                queryCount = int.Parse(headers["queries"]);

                queryItems     = ParseQueryItems(sr, queryCount, prefix);
                peptideSection = ParseSection(sr, prefix + "peptides", !isDecoy);
            }

            string file = CurrentParameters["FILE"];

            if (file.StartsWith("File Name: "))
            {
                file = file.Substring(10).Trim();
            }
            string defaultExperimental     = FileUtils.ChangeExtension(new FileInfo(file).Name, "");
            bool   isPrecursorMonoisotopic = true;

            if (CurrentParameters.ContainsKey("MASS"))
            {
                isPrecursorMonoisotopic = CurrentParameters["MASS"].Equals("Monoisotopic");
            }

            using (var sr = new StreamReader(datFilename))
            {
                //Progress.SetRange(1, queryCount);
                for (int queryId = 1; queryId <= queryCount; queryId++)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    //Progress.SetPosition(queryId);

                    MascotQueryItem queryItem = queryItems[queryId];

                    var iPeps = new List <IIdentifiedSpectrum>();
                    result[queryId] = iPeps;

                    IIdentifiedSpectrum lastHit = null;
                    int rank = 0;
                    for (int k = 1; k <= 10; k++)
                    {
                        string key = "q" + queryId + "_p" + k;
                        if (!peptideSection.ContainsKey(key))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        string line = peptideSection[key];
                        if (line == null || line.Equals("-1"))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        Match mDetail = this.peptideRegex.Match(line);
                        if (!mDetail.Success)
                        {
                            throw new Exception("Wrong format of peptides : " + line);
                        }

                        double score = MyConvert.ToDouble(mDetail.Groups["Score"].Value);
                        if (score < minScore)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }
                            break;
                        }

                        bool bSameRank = null != lastHit && score == lastHit.Score;
                        if (!bSameRank)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }

                            rank++;
                            if (rank > minRank)
                            {
                                break;
                            }
                        }

                        IIdentifiedSpectrum mphit;
                        if (bSameRank)
                        {
                            mphit = lastHit;
                        }
                        else
                        {
                            mphit = new IdentifiedSpectrum();
                            mphit.IsPrecursorMonoisotopic = isPrecursorMonoisotopic;

                            mphit.Rank = rank;
                            mphit.NumMissedCleavages = int.Parse(mDetail.Groups["MissCleavage"].Value);
                            mphit.TheoreticalMass    = MyConvert.ToDouble(mDetail.Groups["TheoreticalMass"].Value);
                            mphit.ExperimentalMass   = queryItem.ExperimentalMass;
                            mphit.Score       = score;
                            mphit.ExpectValue = ExpectValueCalculator.Calc(mphit.Score, queryItem.MatchCount, 0.05);

                            mphit.Query.QueryId    = queryId;
                            mphit.Query.ObservedMz = queryItem.Observed;
                            mphit.Query.Charge     = queryItem.Charge;
                            mphit.Query.MatchCount = queryItem.MatchCount;
                            if (queryItem.HomologyScore != 0)
                            {
                                mphit.Annotations[HomologyScoreKey] = queryItem.HomologyScore;
                            }

                            if (CurrentProtease.IsSemiSpecific)
                            {
                                mphit.NumProteaseTermini = 1;
                            }

                            lastHit = mphit;
                        }

                        var    pureSeq      = mDetail.Groups["Sequence"].Value;
                        string modification = mDetail.Groups["Modification"].Value;
                        var    seq          = ModifySequence(pureSeq, modification);
                        AssignModification(mphit, modification, CurrentModifications);

                        string proteins         = mDetail.Groups["ProteinNames"].Value;
                        Match  proteinNameMatch = this.proteinNameRegex.Match(proteins);

                        string key_terms = key + "_terms";
                        if (!peptideSection.ContainsKey(key_terms))
                        {
                            throw new Exception("Mascot version is too old. It's not supported.");
                        }

                        string value_terms = peptideSection[key_terms];
                        Match  termsMatch  = this.termsRegex.Match(value_terms);

                        int numProteaseTermini = 0;
                        while (proteinNameMatch.Success && termsMatch.Success)
                        {
                            var fullSeq = MyConvert.Format("{0}.{1}.{2}",
                                                           termsMatch.Groups[1].Value,
                                                           seq,
                                                           termsMatch.Groups[2].Value);

                            var name = proteinNameMatch.Groups[1].Value.Replace("/", "_");
                            if (isDecoy)
                            {
                                name = DECOY_PREFIX + name;
                            }

                            bool findPeptide = false;
                            for (int i = 0; i < mphit.Peptides.Count; i++)
                            {
                                if (mphit.Peptides[i].Sequence == fullSeq)
                                {
                                    mphit.Peptides[i].AddProtein(name);
                                    findPeptide = true;
                                    break;
                                }
                            }

                            if (!findPeptide)
                            {
                                var mp = new IdentifiedPeptide(mphit);
                                mp.Sequence = fullSeq;
                                mp.AddProtein(name);

                                if (CurrentProtease.IsSemiSpecific)
                                {
                                    int position = Convert.ToInt32(proteinNameMatch.Groups[2].Value);
                                    int count    = CurrentProtease.GetNumProteaseTermini(termsMatch.Groups[1].Value[0], pureSeq, termsMatch.Groups[2].Value[0], '-', position);
                                    numProteaseTermini = Math.Max(numProteaseTermini, count);
                                }
                            }

                            proteinNameMatch = proteinNameMatch.NextMatch();
                            termsMatch       = termsMatch.NextMatch();
                        }

                        if (CurrentProtease.IsSemiSpecific)
                        {
                            mphit.NumProteaseTermini = Math.Max(mphit.NumProteaseTermini, numProteaseTermini);
                        }

                        if (!bSameRank)
                        {
                            iPeps.Add(mphit);
                        }
                    }

                    string query = "query" + queryId;

                    Dictionary <string, string> querySection = ParseSection(sr, query);
                    string title = Uri.UnescapeDataString(querySection["title"]);

                    SequestFilename sf = this.TitleParser.GetValue(title);
                    sf.Charge = queryItem.Charge;

                    if (sf.Experimental == null || sf.Experimental.Length == 0)
                    {
                        sf.Experimental = defaultExperimental;
                    }

                    foreach (IIdentifiedSpectrum mp in iPeps)
                    {
                        mp.Query.Title    = title;
                        mp.Query.FileScan = sf;
                    }
                }
            }

            return(result);
        }
        /// <summary>
        ///
        /// Get the query/peptide map from mascot dat file.
        ///
        /// </summary>
        /// <param name="filename">pFind proteins file</param>
        /// <param name="minRank">Minimum rank of peptide identified in same spectrum</param>
        /// <param name="minScore">Minimum score of peptide identified in same spectrum</param>
        /// <returns>Query/peptide map</returns>
        public Dictionary <int, List <IIdentifiedSpectrum> > ParsePeptides(string filename, int minRank, double minScore)
        {
            var result = new Dictionary <int, List <IIdentifiedSpectrum> >();

            var sourceDir = GetSourceFile(filename);

            using (var sr = new StreamReader(filename))
            {
                var parameters = ParseSection(sr, "Search");

                var mm = ParseModification(parameters);

                foreach (var mod in mm.DynamicModification)
                {
                    if (!this.ModificationCharMap.ContainsKey(mod.Modification))
                    {
                        this.ModificationCharMap[mod.Modification] = ModificationConsts.MODIFICATION_CHAR[this.ModificationCharMap.Count + 1];
                    }
                }

                var headers = ParseSection(sr, "Total");

                var queryCount = int.Parse(headers["Spectra"]);

                Progress.SetRange(1, queryCount);
                for (int queryId = 1; queryId <= queryCount; queryId++)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    Progress.SetPosition(queryId);

                    var speName        = MyConvert.Format("Spectrum{0}", queryId);
                    var peptideSection = ParseSection(sr, speName);

                    int candidateCount = int.Parse(peptideSection["ValidCandidate"]);
                    if (candidateCount == 0)
                    {
                        continue;
                    }

                    var expMH  = MyConvert.ToDouble(peptideSection["MH"]);
                    var expMz  = MyConvert.ToDouble(peptideSection["MZ"]);
                    var charge = int.Parse(peptideSection["Charge"]);

                    var iPeps = new List <IIdentifiedSpectrum>();
                    result[queryId] = iPeps;

                    IIdentifiedSpectrum lastHit = null;
                    int rank = 0;
                    for (int k = 1; k <= candidateCount; k++)
                    {
                        string key      = "NO" + k.ToString();
                        var    scoreKey = key + "_Score";
                        if (!peptideSection.ContainsKey(scoreKey))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        double score = MyConvert.ToDouble(peptideSection[scoreKey]);
                        if (score < minScore || score == 0.0)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }
                            break;
                        }

                        bool bSameRank = null != lastHit && score == lastHit.Score;
                        if (!bSameRank)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }

                            rank++;
                            if (rank > minRank)
                            {
                                break;
                            }
                        }

                        IIdentifiedSpectrum mphit;
                        if (bSameRank)
                        {
                            mphit = lastHit;
                        }
                        else
                        {
                            mphit = new IdentifiedSpectrum();

                            mphit.Rank        = rank;
                            mphit.Score       = score;
                            mphit.ExpectValue = MyConvert.ToDouble(peptideSection[key + "_EValue"]);

                            var mhkey = key + "_MH";
                            if (peptideSection.ContainsKey(mhkey))
                            {
                                mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[mhkey]);
                            }
                            else
                            {
                                mphit.TheoreticalMH = MyConvert.ToDouble(peptideSection[key + "_Mass"]);
                            }

                            var micKey = key + "_Matched_Peaks";
                            if (peptideSection.ContainsKey(micKey))
                            {
                                mphit.MatchedIonCount = int.Parse(peptideSection[micKey]);
                                mphit.MatchedTIC      = MyConvert.ToDouble(peptideSection[key + "_Matched_Intensity"]);
                            }

                            var misKey = key + "_MissCleave";
                            if (peptideSection.ContainsKey(misKey))
                            {
                                mphit.NumMissedCleavages = int.Parse(peptideSection[misKey]);
                            }
                            mphit.ExperimentalMH = expMH;
                            mphit.DeltaScore     = 1.0;

                            mphit.Query.QueryId    = queryId;
                            mphit.Query.ObservedMz = expMz;
                            mphit.Query.Charge     = charge;
                            //mphit.Query.MatchCount = queryItem.MatchCount;

                            lastHit = mphit;
                        }

                        var mp = new IdentifiedPeptide(mphit);
                        mp.Sequence = peptideSection[key + "_SQ"];

                        string modificationPos  = peptideSection[key + "_Modify_Pos"];
                        string modificationName = peptideSection[key + "_Modify_Name"];

                        Dictionary <int, string> modifications = GetModifications(modificationPos, modificationName);

                        ModifySequence(mp, modifications, mm);
                        AssignModification(mphit, modifications, mm);

                        string proteins = peptideSection[key + "_Proteins"];
                        var    parts    = proteins.Split(',');
                        for (int i = 1; i < parts.Count(); i++)
                        {
                            mp.AddProtein(parts[i]);
                        }

                        if (!bSameRank)
                        {
                            iPeps.Add(mphit);
                        }
                    }

                    var title = new FileInfo(peptideSection["Input"]).Name;

                    SequestFilename sf = this.TitleParser.GetValue(title);
                    sf.Charge = charge;

                    if (sf.Experimental == null || sf.Experimental.Length == 0)
                    {
                        sf.Experimental = sourceDir;
                    }

                    foreach (IIdentifiedSpectrum mp in iPeps)
                    {
                        mp.Query.Title = title;
                        mp.Query.FileScan.LongFileName = sf.LongFileName;
                    }
                }
            }

            return(result);
        }
        protected IdentifiedProtein ParseProtein(String proteinContent)
        {
            IdentifiedProtein result = GetProtein(proteinContent);

            List <String> peptideInfoContentList = GetPeptideInfoContentList(proteinContent);

            foreach (String peptideInfoContent in peptideInfoContentList)
            {
                List <String> peptideInfo = GetPeptideInfo(peptideInfoContent);
                if (0 == peptideInfo.Count)
                {
                    continue;
                }

                IIdentifiedSpectrum mphit = new IdentifiedSpectrum();

                // Group 0 : peptide mass from observed m/z
                double experimentalPeptideMass = MyConvert.ToDouble(peptideInfo[0]);
                mphit.ExperimentalMass = experimentalPeptideMass;

                // Group 1 : observed m/z
                double observed = MyConvert.ToDouble(peptideInfo[1]);
                mphit.Query.ObservedMz = observed;

                // Group 2 : charge
                int charge = int.Parse(peptideInfo[2]);
                mphit.Query.Charge = charge;

                // Group 3 : title
                String title = Uri.UnescapeDataString(peptideInfo[3]).Trim();
                mphit.Query.Title = title;

                SequestFilename sf = MascotUtils.ParseTitle(title, charge);
                if (sf != null)
                {
                    mphit.Query.FileScan.LongFileName = sf.LongFileName;
                }

                // Group 4 : query
                mphit.Query.QueryId = int.Parse(peptideInfo[4]);

                // Group 5 equals Group 1

                // Group 6 equals Group 0

                // Group 7 : calculated peptide mass
                mphit.TheoreticalMass = MyConvert.ToDouble(peptideInfo[7]);

                // Group 8 : different between observed peptide mass and calculated
                // peptide mass

                // Group 9 : miss cleavage
                mphit.NumMissedCleavages = int.Parse(peptideInfo[9]);

                // Group 10: score
                mphit.Score = int.Parse(peptideInfo[10]);

                // Group 11: expect p value
                mphit.ExpectValue = MyConvert.ToDouble(peptideInfo[11]);

                // Group 12: rank
                mphit.Rank = int.Parse(peptideInfo[12]);

                // Group 13: peptide sequence
                // &nbsp;K.YEINVLR<u>.</u>N + Label:18O(2) (C-term)
                String seq = peptideInfo[13].Replace("&nbsp;", "");

                var mpep = new IdentifiedPeptide(mphit);

                string[] parts = Regex.Split(seq, "\\+");
                if (parts.Length > 1)
                {
                    seq = parts[0].Trim();
                    mphit.Modifications = parts[1].Trim();
                    string[] mods = parts[1].Trim().Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (string mod in mods)
                    {
                        Match m = this.modificationReg.Match(mod.Trim());
                        if (!this.modifications.ContainsKey(m.Groups[1].Value))
                        {
                            this.modifications[m.Groups[1].Value] = ' ';
                        }
                    }
                }

                mpep.Sequence = seq;

                if (GetPeptideFilter().Accept(mphit))
                {
                    mpep.AddProtein(result.Name);
                    result.Peptides.Add(mpep);
                }
            }

            return(result);
        }
        public override List <IIdentifiedProtein> ParseProteins(string fileName)
        {
            Dictionary <string, IIdentifiedProtein> proteinMap = new Dictionary <string, IIdentifiedProtein>();

            Application xApp = new Application();

            //得到WorkBook对象, 可以用两种方式之一: 下面的是打开已有的文件
            Workbook xBook = xApp.Workbooks._Open(fileName,
                                                  Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                  , Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                  , Missing.Value, Missing.Value, Missing.Value, Missing.Value);

            try
            {
                Worksheet xSheet = (Worksheet)xBook.Sheets[1];

                int fromRow = 2;
                int endRow  = fromRow;

                for (; endRow <= xSheet.Rows.Count; endRow++)
                {
                    string b = xSheet.Value('B', endRow);
                    if (null == b)
                    {
                        break;
                    }
                }
                endRow--;

                Progress.SetRange(fromRow, endRow);
                Progress.SetMessage("Parsing file ...");
                for (int i = fromRow; i <= endRow; i++)
                {
                    Progress.SetPosition(i);

                    string seq = xSheet.Value('A', i);
                    if (null == seq)//蛋白质信息
                    {
                        continue;
                    }

                    string deltaCn = xSheet.Value('I', i);
                    if (null == deltaCn)//rank > 1
                    {
                        continue;
                    }

                    string protein = xSheet.Value('B', i);
                    if (!proteinMap.ContainsKey(protein))
                    {
                        var p = new IdentifiedProtein(protein);

                        p.Coverage         = MyConvert.ToDouble(xSheet.Value('C', i + 2));
                        p.MolecularWeight  = MyConvert.ToDouble(xSheet.Value('F', i + 2)) * 1000;
                        p.IsoelectricPoint = MyConvert.ToDouble(xSheet.Value('G', i + 2));
                        p.Score            = MyConvert.ToDouble(xSheet.Value('H', i + 2));
                        p.Description      = xSheet.Value('I', i + 2);

                        proteinMap[protein] = p;
                    }

                    var pro = proteinMap[protein];

                    IdentifiedSpectrum spectrum = new IdentifiedSpectrum();
                    IdentifiedPeptide  peptide  = new IdentifiedPeptide(spectrum);
                    peptide.Sequence = seq.ToUpper();
                    peptide.AddProtein(protein);
                    spectrum.Modifications               = xSheet.Value('F', i);
                    spectrum.DeltaScore                  = MyConvert.ToDouble(deltaCn);
                    spectrum.Charge                      = Convert.ToInt32(xSheet.Value('K', i));
                    spectrum.ObservedMz                  = MyConvert.ToDouble(xSheet.Value('L', i));
                    spectrum.TheoreticalMH               = MyConvert.ToDouble(xSheet.Value('M', i));
                    spectrum.Ions                        = xSheet.Value('S', i);
                    spectrum.Query.FileScan.FirstScan    = Convert.ToInt32(xSheet.Value('P', i));
                    spectrum.Query.FileScan.LastScan     = Convert.ToInt32(xSheet.Value('Q', i));
                    spectrum.Query.FileScan.Experimental = FileUtils.RemoveAllExtension(xSheet.Value('T', i));

                    pro.Peptides.Add(peptide);
                }
            }
            finally
            {
                xBook.Close(false, Type.Missing, Type.Missing);
            }

            var proteins = proteinMap.Values.ToList();

            return(proteins);
        }
Exemple #25
0
        public List <IIdentifiedSpectrum> ReadFromFile(string fileName)
        {
            XElement root = XElement.Load(fileName);

            XElement request = root.FindElement("MSSearch_request");

            //parsing identification protocol first
            var modMap   = ParseSearchModificationMap(request.FindFirstDescendant("MSSearchSettings_variable"));
            var protease = ParseProtease(request.FindFirstDescendant("MSSearchSettings_enzyme"));

            Func <string, int> missCalc;

            if (protease == null)
            {
                missCalc = m => 0;
            }
            else
            {
                missCalc = m => protease.GetMissCleavageSiteCount(m);
            }

            //parsing sequence collection, including protein<->peptide map
            var result   = new List <IIdentifiedSpectrum>();
            var response = root.FindElement("MSSearch_response");
            var scale    = double.Parse(response.FindFirstDescendant("MSResponse_scale").Value);

            var idList = response.FindFirstDescendant("MSResponse_hitsets");

            foreach (var sir in idList.FindElements("MSHitSet"))
            {
                var hits = sir.FindElement("MSHitSet_hits");
                if (hits == null)
                {
                    continue;
                }

                var spectrum = new IdentifiedSpectrum();
                result.Add(spectrum);

                var title = sir.FindElement("MSHitSet_ids").FindElement("MSHitSet_ids_E").Value;
                spectrum.Query.FileScan = this.TitleParser.GetValue(title);

                foreach (var hit in hits.FindElements("MSHits"))
                {
                    var evalue = double.Parse(hit.FindElement("MSHits_evalue").Value);
                    if (spectrum.Peptides.Count > 0)
                    {
                        if (evalue > spectrum.ExpectValue)
                        {
                            continue;
                        }
                        if (evalue < spectrum.ExpectValue)
                        {
                            spectrum.ClearPeptides();
                        }
                    }
                    spectrum.ExpectValue = evalue;
                    spectrum.Score       = -Math.Log(spectrum.ExpectValue);
                    if (spectrum.Query.Charge == 0) // trust the charge from title
                    {
                        spectrum.Query.Charge = int.Parse(hit.FindElement("MSHits_charge").Value);
                    }
                    spectrum.ExperimentalMass = double.Parse(hit.FindElement("MSHits_mass").Value) / scale;
                    spectrum.TheoreticalMass  = double.Parse(hit.FindElement("MSHits_theomass").Value) / scale;

                    var peptide = new IdentifiedPeptide(spectrum);
                    var seq     = hit.FindElement("MSHits_pepstring").Value;
                    spectrum.NumMissedCleavages = missCalc(seq);

                    var mods = hit.FindElement("MSHits_mods");
                    if (mods != null)
                    {
                        var modsloc = (from ele in mods.FindElements("MSModHit")
                                       let loc = int.Parse(ele.FindElement("MSModHit_site").Value)
                                                 let modtype = ele.FindElement("MSModHit_modtype").FindElement("MSMod").Value
                                                               orderby loc descending
                                                               select new { Location = loc, ModType = modtype }).ToList();
                        foreach (var modloc in modsloc)
                        {
                            seq = seq.Insert(modloc.Location + 1, modMap[modloc.ModType]);
                        }
                    }

                    peptide.Sequence = hit.FindElement("MSHits_pepstart").Value + "." + seq + "." + hit.FindElement("MSHits_pepstop").Value;

                    foreach (var pep in hit.FindElement("MSHits_pephits").FindElements("MSPepHit"))
                    {
                        var proteinName = pep.FindElement("MSPepHit_defline").Value.StringBefore(" ").StringBefore("\t");
                        peptide.AddProtein(proteinName);
                    }
                }
            }

            return(result);
        }