public static PsmDataCollection ExtractPsmData(XElement results, SearchAlgorithm searchAlgorithm) { PsmDataCollection psms = new PsmDataCollection(); PsmData psm; if (searchAlgorithm == SearchAlgorithm.XTandem) { foreach (var x in results.Descendants("group").Where(x => x?.Element("protein") != null)) { psm = new PsmData(); psm.Id = Convert.ToInt32(x.Attribute("id").Value); psm.Decoy = x.Attribute("label").Value.StartsWith("DECOY_"); // it is possible for each "group" in the pepXML file to have more than one protein. This just means the peptide isn't // unique to a single protein. However, the scoring and modifications are identical (since it is the same PSM), so we // can just use the first protein. That is what we do below. XElement domain = x.Element("protein").Element("peptide").Element("domain"); psm.Seq = domain.Attribute("seq").Value; psm.Start = Convert.ToInt32(domain.Attribute("start").Value); psm.End = Convert.ToInt32(domain.Attribute("end").Value); psm.Hyperscore = Convert.ToDouble(domain.Attribute("hyperscore").Value); psm.ExpectationValue = Convert.ToDouble(domain.Attribute("expect").Value); psm.MassDrift = (Convert.ToDouble(x.Attribute("mh")?.Value) - Convert.ToDouble(domain?.Attribute("mh").Value)) / Convert.ToDouble(domain?.Attribute("mh").Value) * 1e6; psm.Charge = Convert.ToInt32(x.Attribute("z").Value); psm.MissedCleavages = GetMissedCleavages(psm.Seq); // add the modifications, if there are any if (domain?.Elements("aa") != null) { foreach (XElement aa in domain.Elements("aa")) { Modification mod = new Modification(); // we convert the location to a zero-based index of the peptide mod.Loc = Convert.ToInt32(aa.Attribute("at").Value) - psm.Start; mod.AA = aa.Attribute("type").Value; mod.Mass = Convert.ToDouble(aa.Attribute("modified").Value); psm.Mods.Add(mod); } } psms.Add(psm.Id, psm); } } return(psms); }
public static PsmDataCollection ExtractPsmData(XElement results, SearchAlgorithm searchAlgorithm) { PsmDataCollection psms = new PsmDataCollection(); PsmData psm; if (searchAlgorithm == SearchAlgorithm.XTandem) { foreach (var x in results.Descendants("group").Where(x => x?.Element("protein") != null)) { psm = new PsmData(); psm.Id = Convert.ToInt32(x.Attribute("id").Value); psm.Decoy = x.Attribute("label").Value.StartsWith("DECOY_"); // it is possible for each "group" in the pepXML file to have more than one protein. This just means the peptide isn't // unique to a single protein. However, the scoring and modifications are identical (since it is the same PSM), so we // can just use the first protein. That is what we do below. XElement domain = x.Element("protein").Element("peptide").Element("domain"); psm.Seq = domain.Attribute("seq").Value; psm.Start = Convert.ToInt32(domain.Attribute("start").Value); psm.End = Convert.ToInt32(domain.Attribute("end").Value); psm.Hyperscore = Convert.ToDouble(domain.Attribute("hyperscore").Value); psm.ExpectationValue = Convert.ToDouble(domain.Attribute("expect").Value); psm.MassDrift = (Convert.ToDouble(x.Attribute("mh")?.Value) - Convert.ToDouble(domain?.Attribute("mh").Value)) / Convert.ToDouble(domain?.Attribute("mh").Value) * 1e6; psm.Charge = Convert.ToInt32(x.Attribute("z").Value); psm.MissedCleavages = GetMissedCleavages(psm.Seq); // add the modifications, if there are any if (domain?.Elements("aa") != null) { foreach (XElement aa in domain.Elements("aa")) { Modification mod = new Modification(); // we convert the location to a zero-based index of the peptide mod.Loc = Convert.ToInt32(aa.Attribute("at").Value) - psm.Start; mod.AA = aa.Attribute("type").Value; mod.Mass = Convert.ToDouble(aa.Attribute("modified").Value); psm.Mods.Add(mod); } } psms.Add(psm.Id, psm); } } if (searchAlgorithm == SearchAlgorithm.IdentiPy) { XNamespace nsp = "http://regis-web.systemsbiology.net/pepXML"; // first we need to make a dictionary of modification masses etc for the identipy results // the keys are the amino acid mass after modification, which is what identipy reports // the values are the mass difference values, which is what is given in the mass@aa arguments to the CLI XElement summary = results.Descendants(nsp + "search_summary").First(); Dictionary <double, double> modInfo = new Dictionary <double, double>(); foreach (XElement mod in summary.Elements(nsp + "aminoacid_modification")) { modInfo.Add(Convert.ToDouble(mod.Attribute("mass").Value), Convert.ToDouble(mod.Attribute("massdiff").Value)); } foreach (XElement mod in summary.Elements(nsp + "terminal_modification")) { modInfo.Add(Convert.ToDouble(mod.Attribute("mass").Value), Convert.ToDouble(mod.Attribute("massdiff").Value)); } // now we can parse out the data foreach (var x in results.Descendants(nsp + "spectrum_query")) { psm = new PsmData(); psm.Id = Convert.ToInt32(x.Attribute("index").Value); XElement searchHit = x.Element(nsp + "search_result").Element(nsp + "search_hit"); psm.Decoy = searchHit.Attribute("protein").Value.StartsWith("DECOY_"); psm.Seq = searchHit.Attribute("peptide").Value; psm.Start = -1; psm.End = -1; psm.Hyperscore = Convert.ToDouble(searchHit.Elements(nsp + "search_score") .Where(y => y.Attribute("name").Value == "hyperscore").First().Attribute("value").Value); psm.ExpectationValue = Convert.ToDouble(searchHit.Elements(nsp + "search_score") .Where(y => y.Attribute("name").Value == "expect").First().Attribute("value").Value); psm.MassDrift = Convert.ToDouble(searchHit.Attribute("massdiff").Value) / Convert.ToDouble(x.Attribute("precursor_neutral_mass").Value) * 1e6; psm.Charge = Convert.ToInt32(x.Attribute("assumed_charge").Value); psm.MissedCleavages = GetMissedCleavages(psm.Seq); // add the modifications, if there are any if (searchHit.Element(nsp + "modification_info")?.Attribute("mod_nterm_mass") != null) { Modification mod = new Modification(); mod.Loc = 0; // its the n-terminus mod.AA = psm.Seq[0].ToString(); mod.Mass = modInfo[Convert.ToDouble(searchHit.Element(nsp + "modification_info").Attribute("mod_nterm_mass").Value)]; psm.Mods.Add(mod); } if (searchHit.Element(nsp + "modification_info")?.Elements(nsp + "mod_aminoacid_mass") != null) { foreach (XElement aa in searchHit.Element(nsp + "modification_info").Elements(nsp + "mod_aminoacid_mass")) { Modification mod = new Modification(); // we convert the location to a zero-based index of the peptide mod.Loc = Convert.ToInt32(aa.Attribute("position").Value) - 1; mod.AA = psm.Seq[mod.Loc].ToString(); mod.Mass = modInfo[Convert.ToDouble(aa.Attribute("mass").Value)]; psm.Mods.Add(mod); } } psms.Add(psm.Id, psm); } } return(psms); }