Пример #1
0
        public static PsmDataCollection ExtractPsmData(XElement results, SearchAlgorithm searchAlgorithm)
        {
            PsmDataCollection psms = new PsmDataCollection();
            PsmData           psm;

            if (searchAlgorithm == SearchAlgorithm.XTandem)
            {
                foreach (var x in results.Descendants("group").Where(x => x?.Element("protein") != null))
                {
                    psm = new PsmData();

                    psm.Id = Convert.ToInt32(x.Attribute("id").Value);

                    psm.Decoy = x.Attribute("label").Value.StartsWith("DECOY_");

                    // it is possible for each "group" in the pepXML file to have more than one protein. This just means the peptide isn't
                    // unique to a single protein. However, the scoring and modifications are identical (since it is the same PSM), so we
                    // can just use the first protein. That is what we do below.
                    XElement domain = x.Element("protein").Element("peptide").Element("domain");

                    psm.Seq = domain.Attribute("seq").Value;

                    psm.Start = Convert.ToInt32(domain.Attribute("start").Value);

                    psm.End = Convert.ToInt32(domain.Attribute("end").Value);

                    psm.Hyperscore = Convert.ToDouble(domain.Attribute("hyperscore").Value);

                    psm.ExpectationValue = Convert.ToDouble(domain.Attribute("expect").Value);

                    psm.MassDrift = (Convert.ToDouble(x.Attribute("mh")?.Value) - Convert.ToDouble(domain?.Attribute("mh").Value)) /
                                    Convert.ToDouble(domain?.Attribute("mh").Value) * 1e6;

                    psm.Charge = Convert.ToInt32(x.Attribute("z").Value);

                    psm.MissedCleavages = GetMissedCleavages(psm.Seq);

                    // add the modifications, if there are any
                    if (domain?.Elements("aa") != null)
                    {
                        foreach (XElement aa in domain.Elements("aa"))
                        {
                            Modification mod = new Modification();
                            // we convert the location to a zero-based index of the peptide
                            mod.Loc = Convert.ToInt32(aa.Attribute("at").Value) - psm.Start;

                            mod.AA = aa.Attribute("type").Value;

                            mod.Mass = Convert.ToDouble(aa.Attribute("modified").Value);

                            psm.Mods.Add(mod);
                        }
                    }

                    psms.Add(psm.Id, psm);
                }
            }

            return(psms);
        }
Пример #2
0
        public static PsmDataCollection ExtractPsmData(XElement results, SearchAlgorithm searchAlgorithm)
        {
            PsmDataCollection psms = new PsmDataCollection();
            PsmData           psm;

            if (searchAlgorithm == SearchAlgorithm.XTandem)
            {
                foreach (var x in results.Descendants("group").Where(x => x?.Element("protein") != null))
                {
                    psm = new PsmData();

                    psm.Id = Convert.ToInt32(x.Attribute("id").Value);

                    psm.Decoy = x.Attribute("label").Value.StartsWith("DECOY_");

                    // it is possible for each "group" in the pepXML file to have more than one protein. This just means the peptide isn't
                    // unique to a single protein. However, the scoring and modifications are identical (since it is the same PSM), so we
                    // can just use the first protein. That is what we do below.
                    XElement domain = x.Element("protein").Element("peptide").Element("domain");

                    psm.Seq = domain.Attribute("seq").Value;

                    psm.Start = Convert.ToInt32(domain.Attribute("start").Value);

                    psm.End = Convert.ToInt32(domain.Attribute("end").Value);

                    psm.Hyperscore = Convert.ToDouble(domain.Attribute("hyperscore").Value);

                    psm.ExpectationValue = Convert.ToDouble(domain.Attribute("expect").Value);

                    psm.MassDrift = (Convert.ToDouble(x.Attribute("mh")?.Value) - Convert.ToDouble(domain?.Attribute("mh").Value)) /
                                    Convert.ToDouble(domain?.Attribute("mh").Value) * 1e6;

                    psm.Charge = Convert.ToInt32(x.Attribute("z").Value);

                    psm.MissedCleavages = GetMissedCleavages(psm.Seq);

                    // add the modifications, if there are any
                    if (domain?.Elements("aa") != null)
                    {
                        foreach (XElement aa in domain.Elements("aa"))
                        {
                            Modification mod = new Modification();
                            // we convert the location to a zero-based index of the peptide
                            mod.Loc = Convert.ToInt32(aa.Attribute("at").Value) - psm.Start;

                            mod.AA = aa.Attribute("type").Value;

                            mod.Mass = Convert.ToDouble(aa.Attribute("modified").Value);

                            psm.Mods.Add(mod);
                        }
                    }

                    psms.Add(psm.Id, psm);
                }
            }

            if (searchAlgorithm == SearchAlgorithm.IdentiPy)
            {
                XNamespace nsp = "http://regis-web.systemsbiology.net/pepXML";

                // first we need to make a dictionary of modification masses etc for the identipy results
                // the keys are the amino acid mass after modification, which is what identipy reports
                // the values are the mass difference values, which is what is given in the mass@aa arguments to the CLI
                XElement summary = results.Descendants(nsp + "search_summary").First();
                Dictionary <double, double> modInfo = new Dictionary <double, double>();

                foreach (XElement mod in summary.Elements(nsp + "aminoacid_modification"))
                {
                    modInfo.Add(Convert.ToDouble(mod.Attribute("mass").Value), Convert.ToDouble(mod.Attribute("massdiff").Value));
                }
                foreach (XElement mod in summary.Elements(nsp + "terminal_modification"))
                {
                    modInfo.Add(Convert.ToDouble(mod.Attribute("mass").Value), Convert.ToDouble(mod.Attribute("massdiff").Value));
                }

                // now we can parse out the data

                foreach (var x in results.Descendants(nsp + "spectrum_query"))
                {
                    psm = new PsmData();

                    psm.Id = Convert.ToInt32(x.Attribute("index").Value);

                    XElement searchHit = x.Element(nsp + "search_result").Element(nsp + "search_hit");

                    psm.Decoy = searchHit.Attribute("protein").Value.StartsWith("DECOY_");

                    psm.Seq = searchHit.Attribute("peptide").Value;

                    psm.Start = -1;

                    psm.End = -1;

                    psm.Hyperscore = Convert.ToDouble(searchHit.Elements(nsp + "search_score")
                                                      .Where(y => y.Attribute("name").Value == "hyperscore").First().Attribute("value").Value);

                    psm.ExpectationValue = Convert.ToDouble(searchHit.Elements(nsp + "search_score")
                                                            .Where(y => y.Attribute("name").Value == "expect").First().Attribute("value").Value);

                    psm.MassDrift = Convert.ToDouble(searchHit.Attribute("massdiff").Value) / Convert.ToDouble(x.Attribute("precursor_neutral_mass").Value) * 1e6;

                    psm.Charge = Convert.ToInt32(x.Attribute("assumed_charge").Value);

                    psm.MissedCleavages = GetMissedCleavages(psm.Seq);

                    // add the modifications, if there are any
                    if (searchHit.Element(nsp + "modification_info")?.Attribute("mod_nterm_mass") != null)
                    {
                        Modification mod = new Modification();

                        mod.Loc = 0; // its the n-terminus

                        mod.AA = psm.Seq[0].ToString();

                        mod.Mass = modInfo[Convert.ToDouble(searchHit.Element(nsp + "modification_info").Attribute("mod_nterm_mass").Value)];

                        psm.Mods.Add(mod);
                    }

                    if (searchHit.Element(nsp + "modification_info")?.Elements(nsp + "mod_aminoacid_mass") != null)
                    {
                        foreach (XElement aa in searchHit.Element(nsp + "modification_info").Elements(nsp + "mod_aminoacid_mass"))
                        {
                            Modification mod = new Modification();
                            // we convert the location to a zero-based index of the peptide
                            mod.Loc = Convert.ToInt32(aa.Attribute("position").Value) - 1;

                            mod.AA = psm.Seq[mod.Loc].ToString();

                            mod.Mass = modInfo[Convert.ToDouble(aa.Attribute("mass").Value)];

                            psm.Mods.Add(mod);
                        }
                    }

                    psms.Add(psm.Id, psm);
                }
            }

            return(psms);
        }