private List <SpectrumMatch> TDPortalReader(InputFile file) { //if neucode labeled, calculate neucode light theoretical AND observed mass! --> better for matching up //if carbamidomethylated, add 57 to theoretical mass (already in observed mass...) aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled) .AA_Masses; List <SpectrumMatch> td_hits = new List <SpectrumMatch>(); List <List <string> > cells = ExcelReader.get_cell_strings(file, true); //This returns the entire sheet except for the header. Each row of cells is one List<string> //get ptms on proteoform -- check for mods. IF not in database, make new topdown mod, show Warning message. Parallel.ForEach(cells, cellStrings => { bool add_topdown_hit = true; //if PTM or accession not found, will not add (show warning) TopDownResultType tdResultType = (cellStrings[15] == "BioMarker") ? TopDownResultType.Biomarker : ((cellStrings[15] == "Tight Absolute Mass") ? TopDownResultType.TightAbsoluteMass : TopDownResultType.Unknown); if (tdResultType != TopDownResultType.Unknown) //uknown result type! { List <Ptm> ptm_list = new List <Ptm>(); // if nothing gets added, an empty ptmlist is passed to the topdownhit constructor. //N-term modifications if (cellStrings[10].Length > 0) //N Terminal Modification Code { string[] ptms = cellStrings[10].Split('|'); //for bottom-up, don't read in ambiguous PSMs if (file.purpose == Purpose.BottomUp && ptms.Length > 1) { add_topdown_hit = false; } foreach (string ptm in ptms) { int position = Int32.TryParse(cellStrings[5], out int i) ? i : 0; if (position == 0) { add_topdown_hit = false; continue; } if (cellStrings[10].Split(':')[1] == "1458" ) //PSI-MOD 1458 is supposed to be N-terminal acetylation { ptm_list.Add(new Ptm(position, Sweet.lollipop.theoretical_database.uniprotModifications.Values.SelectMany(m => m) .Where(m => m.OriginalId == "N-terminal Acetyl").FirstOrDefault())); } else { string psimod = ptm.Split(':')[1] .Split('@')[0]; //The number after the @ is the position in the protein while (psimod.Length < 5) { psimod = "0" + psimod; //short part should be the accession number, which is an integer } Modification mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values .SelectMany(m => m).Where(m => m.DatabaseReference != null && m.DatabaseReference.ContainsKey("PSI-MOD") && m.DatabaseReference["PSI-MOD"].Contains(psimod)).FirstOrDefault(); if (mod == null) { psimod = "MOD:" + psimod; mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values .SelectMany(m => m).Where(m => m.DatabaseReference != null && m.DatabaseReference.ContainsKey("PSI-MOD") && m.DatabaseReference["PSI-MOD"].Contains(psimod)).FirstOrDefault(); } if (mod != null) { ptm_list.Add(new Ptm(position, mod)); } else { lock (bad_ptms) { bad_ptms.Add("PSI-MOD:" + psimod + " at " + position); } add_topdown_hit = false; } } } } //don't have example of c-term modification to write code //other mods if (cellStrings[9].Length > 0) //Modification Codes { string[] ptms = cellStrings[9].Split('|'); foreach (string ptm in ptms) { Modification mod = null; string id = ""; if (ptm.Split(':').Length < 2) { add_topdown_hit = false; continue; } if (ptm.Split(':')[1].Split('@').Length < 2) { add_topdown_hit = false; continue; } int position_after_begin = (Int32.TryParse(ptm.Split(':')[1].Split('@')[1], out int j) ? j : -1) + 1; //one based sequence //they give position # as from begin site -> want to report in terms of overall sequence #'s //begin + position from begin - 1 => position in overall sequence if (position_after_begin == 0) { add_topdown_hit = false; continue; } int begin = Int32.TryParse(cellStrings[5], out int k) ? k : 0; if (begin == 0) { add_topdown_hit = false; continue; } int position = begin + position_after_begin - 1; if (ptm.Split(':')[0] == "RESID") { string resid = ptm.Split(':')[1] .Split('@')[0]; //The number after the @ is the position in the protein while (resid.Length < 4) { resid = "0" + resid; //short part should be the accession number, which is an integer } resid = "AA" + resid; id = "RESID:" + resid; mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values.SelectMany(m => m) .Where(m => m.DatabaseReference != null && m.DatabaseReference.ContainsKey("RESID") && m.DatabaseReference["RESID"].Contains(resid)).FirstOrDefault(); } else if (ptm.Split(':')[0] == "PSI-MOD") { string psimod = ptm.Split(':')[1] .Split('@')[0]; //The number after the @ is the position in the protein while (psimod.Length < 5) { psimod = "0" + psimod; //short part should be the accession number, which is an integer } mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values.SelectMany(m => m) .Where(m => m.DatabaseReference != null && m.DatabaseReference.ContainsKey("PSI-MOD") && m.DatabaseReference["PSI-MOD"].Contains(psimod)).FirstOrDefault(); if (mod == null) { psimod = "MOD:" + psimod; mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values .SelectMany(m => m).Where(m => m.DatabaseReference != null && m.DatabaseReference.ContainsKey("PSI-MOD") && m.DatabaseReference["PSI-MOD"].Contains(psimod)).FirstOrDefault(); } id = "PSI-MOD:" + psimod; } if (mod != null) { ptm_list.Add(new Ptm(position, mod)); } else { lock (bad_ptms) { bad_ptms.Add(id + " at " + cellStrings[4][position_after_begin - 1]); } add_topdown_hit = false; } }
public TopDownHit(Dictionary <char, double> aaIsotopeMassList, InputFile file, TopDownResultType tdResultType, string accession, string pfr, string uniprot_id, string name, string sequence, int begin, int end, List <Ptm> modifications, double reported_mass, double theoretical_mass, int scan, double retention_time, string filename, double pscore, double score) { this.pfr_accession = pfr; this.file = file; this.tdResultType = tdResultType; this.accession = accession; this.uniprot_id = uniprot_id; this.name = name; this.sequence = sequence; this.begin = begin; this.end = end; this.ptm_list = modifications; //if neucode labeled, calculate neucode mass.... this.reported_mass = Sweet.lollipop.neucode_labeled ? Sweet.lollipop.get_neucode_mass(reported_mass, sequence.Count(s => s == 'K')) : reported_mass; this.theoretical_mass = CalculateProteoformMass(sequence, aaIsotopeMassList) + ptm_list.Where(p => p.modification != null).Sum(p => Math.Round((double)p.modification.MonoisotopicMass, 5)); this.ms2ScanNumber = scan; this.ms2_retention_time = retention_time; this.filename = filename; this.score = score; this.pscore = pscore; }