private List<CompositionHypothesisEntry> addAdducts(List<CompositionHypothesisEntry> CHy, GlycanHypothesisCombinatorialGenerator GD) { List<string> elementIDs = new List<string>(); List<string> molname = new List<string>(); for (int j = 0; j < CHy.Count(); j++ ) { if (CHy[j].ElementNames.Count > 0) { for (int i = 0; i < CHy[j].ElementNames.Count(); i++) { elementIDs.Add(CHy[j].ElementNames[i]); } for (int i = 0; i < CHy[j].MoleculeNames.Count(); i++) { molname.Add(CHy[j].MoleculeNames[i]); } break; } } Double adductMas = CalculateAdductMass(GD); Int32 adductLB = new Int32(); Int32 adductUB = new Int32(); try { adductLB = Convert.ToInt32(GD.Modification[2]); adductUB = Convert.ToInt32(GD.Modification[3]); } catch (Exception ex) { MessageBox.Show("Lower bound and Upper bound in the Modification list must be integers. Error:" + ex); this.Close(); } AdductComposition adc = getAdductCompo(GD); //update elementID list for (int i = 0; i < adc.elementIDs.Count(); i++) { if (!(elementIDs.Any(a => a.Contains(adc.elementIDs[i])))) { elementIDs.Add(adc.elementIDs[i]); foreach (CompositionHypothesisEntry CH in CHy) { CH.ElementAmount.Add(0); } } } List<CompositionHypothesisEntry> supFinalAns = new List<CompositionHypothesisEntry>(); for (int i = 0; i < CHy.Count(); i++) { if (adductLB != 0) { CompositionHypothesisEntry temp = new CompositionHypothesisEntry(); temp.ElementAmount = CHy[i].ElementAmount; temp.AdductNum = 0; temp.eqCount = CHy[i].eqCount; temp.MassWeight = CHy[i].MassWeight; supFinalAns.Add(temp); } for (int j = adductLB; j <= adductUB; j++) { CompositionHypothesisEntry temp = new CompositionHypothesisEntry(); for (int k = 0; k < CHy[i].ElementAmount.Count(); k++) { temp.ElementAmount.Add(CHy[i].ElementAmount[k]); } for (int l = 0; l < adc.elementAmount.Count(); l++) { temp.ElementAmount[elementIDs.IndexOf(adc.elementIDs[l])] = CHy[i].ElementAmount[elementIDs.IndexOf(adc.elementIDs[l])] + j * adc.elementAmount[l]; } temp.AdductNum = j; temp.eqCount = CHy[i].eqCount; temp.MassWeight = CHy[i].MassWeight + j * adductMas; supFinalAns.Add(temp); } } for (int i = 0; i < supFinalAns.Count(); i++) { supFinalAns[i].ElementNames.Clear(); supFinalAns[i].MoleculeNames.Clear(); if (i == supFinalAns.Count() - 1) { supFinalAns[0].ElementNames = elementIDs; supFinalAns[0].MoleculeNames = molname; } } return supFinalAns; }
//This function reads a composition hypothesis file, get its data and return a list of comphypo. public List<CompositionHypothesisEntry> getCompHypo(String currentPath) { //This is the list for storing the answer. List<CompositionHypothesisEntry> compotable = new List<CompositionHypothesisEntry>(); try { FileStream reading = new FileStream(currentPath, FileMode.Open, FileAccess.Read); StreamReader readcompo = new StreamReader(reading); //Read the first line to skip the column names: String head = readcompo.ReadLine(); String[] headers = head.Split(','); List<string> molename = new List<string>(); List<string> elementIDs = new List<string>(); int h = 1; while (headers[h] != "Compositions") { //Console.WriteLine(headers[h]); elementIDs.Add(headers[h]); h++; } h++; while (headers[h] != "Adduct/Replacement") { //Console.WriteLine(headers[h]); molename.Add(headers[h]); h++; } bool firstrow = true; //Read the other lines for compTable data. while (readcompo.Peek() >= 0) { String Line = readcompo.ReadLine(); String[] eachentry = Line.Split(','); if (eachentry.Count() < 2) break; if (string.IsNullOrEmpty(eachentry[0])) break; //comhyp is used to store the data that will be put into the list, compotable. CompositionHypothesisEntry comhyp = new CompositionHypothesisEntry(); comhyp.IsDecoy = true; if (firstrow) { comhyp.ElementNames = elementIDs; comhyp.MoleculeNames = molename; firstrow = false; } comhyp.MassWeight = Convert.ToDouble(eachentry[0]); int i = 1; bool moreElements = true; while (moreElements) { if (headers[i] != "Compositions") { comhyp.ElementAmount.Add(Convert.ToInt32(eachentry[i])); i++; } else moreElements = false; } comhyp.CompoundComposition = Convert.ToString(eachentry[i]); i++; bool moreCompounds = true; List<int> eqCoun = new List<int>(); while (moreCompounds) { if (headers[i] != "Adduct/Replacement") { if (!String.IsNullOrEmpty(eachentry[i])) eqCoun.Add(Convert.ToInt32(eachentry[i])); else eqCoun.Add(0); i++; } else moreCompounds = false; } comhyp.eqCounts = eqCoun; comhyp.AddRep = Convert.ToString(eachentry[i]); comhyp.AdductNum = Convert.ToInt32(eachentry[i + 1]); if (eachentry.Count() > (i + 2)) { comhyp.PepSequence = eachentry[i + 2]; comhyp.PepModification = eachentry[i + 3]; comhyp.MissedCleavages = Convert.ToInt32(eachentry[i + 4]); comhyp.NumGlycosylations = Convert.ToInt32(eachentry[i + 5]); comhyp.StartAA = Convert.ToInt32(eachentry[i + 6]); comhyp.EndAA = Convert.ToInt32(eachentry[i + 7]); Console.WriteLine("{0}, {1}, {2}, {3}", eachentry.Count(), eachentry[1], i, i + 6); if (eachentry.Count() > i + 8) { comhyp.ProteinID = eachentry[i + 8]; } else { comhyp.ProteinID = "?"; } } else { comhyp.PepSequence = ""; comhyp.PepModification = ""; comhyp.MissedCleavages = 0; comhyp.NumGlycosylations = 0; comhyp.StartAA = 0; comhyp.EndAA = 0; comhyp.ProteinID = "?"; } compotable.Add(comhyp); } readcompo.Close(); reading.Close(); } catch (OutOfMemoryException ex) { throw; } catch (Exception compoex) { MessageBox.Show("Error in loading GlycanCompositions Hypothesis File. Error:" + compoex); } return compotable; }
/// <summary> /// /// </summary> /// <param name="reading"></param> /// <returns></returns> public List<CompositionHypothesisEntry> getCompHypoFromStream(MemoryStream reading) { Console.WriteLine("---getCompHypoFromStream---"); List<CompositionHypothesisEntry> compotable = new List<CompositionHypothesisEntry>(); StreamReader readcompo = new StreamReader(reading); //Read the first line to skip the column names: String head = readcompo.ReadLine(); String[] headers = head.Split(','); List<string> molename = new List<string>(); List<string> elementIDs = new List<string>(); int h = 1; while (headers[h] != "Compositions") { elementIDs.Add(headers[h]); h++; } h++; while (headers[h] != "Adduct/Replacement") { molename.Add(headers[h]); h++; } bool firstrow = true; //Read the other lines for compTable data. while (readcompo.Peek() >= 0) { String Line = readcompo.ReadLine(); String[] eachentry = Line.Split(','); if (eachentry.Count() < 2) break; if (string.IsNullOrEmpty(eachentry[0])) break; //comhyp is used to store the data that will be put into the list, compotable. CompositionHypothesisEntry comhyp = new CompositionHypothesisEntry(); comhyp.IsDecoy = true; if (firstrow) { comhyp.ElementNames = elementIDs; comhyp.MoleculeNames = molename; firstrow = false; } comhyp.MassWeight = Convert.ToDouble(eachentry[0]); int i = 1; bool moreElements = true; while (moreElements) { if (headers[i] != "Compositions") { comhyp.ElementAmount.Add(Convert.ToInt32(eachentry[i])); i++; } else moreElements = false; } comhyp.CompoundComposition = Convert.ToString(eachentry[i]); i++; bool moreCompounds = true; List<int> eqCoun = new List<int>(); while (moreCompounds) { if (headers[i] != "Adduct/Replacement") { if (!String.IsNullOrEmpty(eachentry[i])) eqCoun.Add(Convert.ToInt32(eachentry[i])); else eqCoun.Add(0); i++; } else moreCompounds = false; } comhyp.eqCounts = eqCoun; comhyp.AddRep = Convert.ToString(eachentry[i]); comhyp.AdductNum = Convert.ToInt32(eachentry[i + 1]); if (eachentry.Count() > (i + 2)) { comhyp.PepSequence = eachentry[i + 2]; comhyp.PepModification = eachentry[i + 3]; comhyp.MissedCleavages = Convert.ToInt32(eachentry[i + 4]); comhyp.NumGlycosylations = Convert.ToInt32(eachentry[i + 5]); comhyp.StartAA = Convert.ToInt32(eachentry[i + 6]); comhyp.EndAA = Convert.ToInt32(eachentry[i + 7]); comhyp.ProteinID = eachentry[i + 8]; } else { comhyp.PepSequence = ""; comhyp.PepModification = ""; comhyp.MissedCleavages = 0; comhyp.NumGlycosylations = 0; comhyp.StartAA = 0; comhyp.EndAA = 0; comhyp.ProteinID = "?"; } compotable.Add(comhyp); } readcompo.Close(); reading.Close(); return compotable; }
//This class helps the generateHypo classin the Additional Rules section by translating letters in the artable into numbers. private String translet(CompositionHypothesisEntry one, Boundary ar) { String Ans = ""; foreach (char i in ar.Formula) { if (char.IsUpper(i)) { try { Ans = Ans + Convert.ToString(one.eqCount[Convert.ToString(i)]); } catch { MessageBox.Show("Invalid letter in the additional rules table."); } } else { Ans = Ans + Convert.ToString(i); } } return Ans; }
public String converConstraints(CompositionHypothesisEntry CH, String Constraint) { String newConstraint = ""; //Use the cleanbound function to clean up all letters in the bounds. foreach (char i in Constraint) { if (char.IsUpper(i)) { newConstraint = newConstraint + "(" + CH.eqCount[Convert.ToString(i)] + ")"; } else { newConstraint = newConstraint + Convert.ToString(i); } } return newConstraint; }
private List<CompositionHypothesisEntry> calComHypo(List<GlycanCompositionTable> CoTa) { List<CompositionHypothesisEntry> Ans = new List<CompositionHypothesisEntry>(); List<string> elementIDs = new List<string>(); for (int j = 0; j < CoTa.Count(); j++ ) { if (CoTa[j].elementIDs.Count > 0) { for (int i = 0; i < CoTa[j].elementIDs.Count(); i++) { elementIDs.Add(CoTa[j].elementIDs[i]); } break; } } Double MW = new Double(); foreach (GlycanCompositionTable j in CoTa) { List<CompositionHypothesisEntry> tempAns = new List<CompositionHypothesisEntry>(); foreach (String k in j.Bound) { Int32 boundNumber = Convert.ToInt32(k); //Append this molecule to the other previousCompositions. if (Ans.Count != 0) { for (int l = 0; l < Ans.Count; l++) { CompositionHypothesisEntry comphypoAns = new CompositionHypothesisEntry(); foreach (var item in Ans[l].eqCount) { comphypoAns.eqCount.Add(item.Key, item.Value); } comphypoAns.eqCount[j.Letter] = boundNumber; for (int sh = 0; sh < Ans[l].ElementAmount.Count(); sh++) { comphypoAns.ElementAmount.Add(boundNumber * j.elementAmount[sh] + Ans[l].ElementAmount[sh]); } MW = getcompMass(j, elementIDs) * boundNumber; comphypoAns.MassWeight = MW + Ans[l].MassWeight; tempAns.Add(comphypoAns); } } else { CompositionHypothesisEntry anothercomphypoAns = new CompositionHypothesisEntry(); anothercomphypoAns.eqCount.Add("A", 0); anothercomphypoAns.eqCount.Add("B", 0); anothercomphypoAns.eqCount.Add("C", 0); anothercomphypoAns.eqCount.Add("D", 0); anothercomphypoAns.eqCount.Add("E", 0); anothercomphypoAns.eqCount.Add("F", 0); anothercomphypoAns.eqCount.Add("G", 0); anothercomphypoAns.eqCount.Add("H", 0); anothercomphypoAns.eqCount.Add("I", 0); anothercomphypoAns.eqCount.Add("J", 0); anothercomphypoAns.eqCount.Add("K", 0); anothercomphypoAns.eqCount.Add("L", 0); anothercomphypoAns.eqCount.Add("M", 0); anothercomphypoAns.eqCount.Add("N", 0); anothercomphypoAns.eqCount.Add("O", 0); anothercomphypoAns.eqCount.Add("P", 0); anothercomphypoAns.eqCount.Add("Q", 0); //Add this molecule to the list anothercomphypoAns.eqCount[j.Letter] = boundNumber; for (int sh = 0; sh < j.elementAmount.Count(); sh++) { anothercomphypoAns.ElementAmount.Add(boundNumber * j.elementAmount[sh]); } MW = boundNumber * this.getcompMass(j, elementIDs); anothercomphypoAns.MassWeight = MW; tempAns.Add(anothercomphypoAns); } } Ans.Clear(); Ans.AddRange(tempAns); } //Lastly, remove the repeated rows Ans = Ans.OrderByDescending(a => a.MassWeight).ToList(); List<CompositionHypothesisEntry> Answer = new List<CompositionHypothesisEntry>(); int startrow = 1; int endrow = 3; if (Ans.Count() > endrow) endrow = Ans.Count(); Boolean OK = true; for (int i = 0; i < (Ans.Count())-1; i++) { startrow = i + 1; endrow = startrow + 3; if (Ans.Count() < endrow) endrow = Ans.Count(); for (int j = startrow; j < endrow; j++) { if (Ans[i].eqCount.SequenceEqual(Ans[j].eqCount)) { OK = false; continue; } } if (OK) Answer.Add(Ans[i]); OK = true; } Answer.Add(Ans[Ans.Count()-1]); for (int i = 0; i < Answer.Count(); i++) { Answer[i].ElementNames.Clear(); if (i == Answer.Count() - 1) Answer[0].ElementNames = elementIDs; } return Answer; }
//override, if a composition hyposthesis file isn't loaded private void generateGPCompHypo() { List<Peptide> PP = GetPeptidesFromTable(); List<CompositionHypothesisEntry> Ans = new List<CompositionHypothesisEntry>(); for (int i = 0; i < PP.Count; i++) { if (PP[i].Selected) { List<CompositionHypothesisEntry> Temp = new List<CompositionHypothesisEntry>(); CompositionHypothesisEntry temp = new CompositionHypothesisEntry(); //First line: temp.CompoundComposition = ""; temp.AdductNum = 0; temp.AddRep = ""; temp.MassWeight = PP[i].Mass; //columns for glycopeptides temp.PepModification = PP[i].Modifications; temp.PepSequence = PP[i].Sequence; temp.MissedCleavages = PP[i].MissedCleavages; temp.StartAA = PP[i].StartAA; temp.EndAA = PP[i].EndAA; temp.NumGlycosylations = 0; Ans.Add(temp); } } String composition = "0"; foreach (CompositionHypothesisEntry ch in Ans) { ch.CompoundComposition = composition; } DataTable DT = genDT(Ans); dataGridView2.DataSource = DT; button6.Enabled = true; }
private void BuildGlycopeptideHypothesis1(string comhypopath) { List<CompositionHypothesisEntry> CHy = getCompHypo(comhypopath); List<string> elementIDs = new List<string>(); List<string> molename = new List<string>(); for (int j = 0; j < CHy.Count(); j++) { if (CHy[j].ElementNames.Count > 0) { for (int i = 0; i < CHy[j].ElementNames.Count(); i++) { elementIDs.Add(CHy[j].ElementNames[i]); } for (int i = 0; i < CHy[j].MoleculeNames.Count(); i++) { molename.Add(CHy[j].MoleculeNames[i]); } break; } } String AddRep = CHy[0].AddRep; int indexH = 0; int indexO = 0; int indexWater = 0; try { indexH = elementIDs.IndexOf("H"); indexO = elementIDs.IndexOf("O"); indexWater = molename.IndexOf("Water"); if (indexWater < 0) { indexWater = molename.IndexOf("G:Water"); } if (indexWater < 0) { throw new Exception("No Water!"); } } catch { MessageBox.Show("Your composition hypothesis contains a compound without Water. Job terminated."); return; } List<Peptide> PP = GetPeptidesFromTable(); List<CompositionHypothesisEntry> Ans = new List<CompositionHypothesisEntry>(); //Ans.AddRange(CHy); PeriodicTable PT = new PeriodicTable(); for (int i = 0; i < PP.Count; i++) { if (PP[i].Selected) { Int32 Count = Convert.ToInt32(PP[i].NumGlycosylations); List<CompositionHypothesisEntry> Temp = new List<CompositionHypothesisEntry>(); CompositionHypothesisEntry temp = new CompositionHypothesisEntry(); //First line: temp.CompoundComposition = ""; temp.AdductNum = 0; temp.AddRep = ""; temp.MassWeight = PP[i].Mass; for (int s = 0; s < CHy[0].eqCounts.Count; s++) { temp.eqCounts.Add(0); } for (int s = 0; s < CHy[0].ElementAmount.Count; s++) { temp.ElementAmount.Add(0); } //columns for glycopeptides temp.PepModification = PP[i].Modifications; temp.PepSequence = PP[i].Sequence; temp.MissedCleavages = PP[i].MissedCleavages; temp.StartAA = PP[i].StartAA; temp.EndAA = PP[i].EndAA; temp.NumGlycosylations = 0; Temp.Add(temp); for (int j = 0; j < Count; j++) { List<CompositionHypothesisEntry> Temp2 = new List<CompositionHypothesisEntry>(); for (int k = 0; k < Temp.Count(); k++) { //need to reread the file and get new reference, because c# keeps passing by reference which creates a problem. List<CompositionHypothesisEntry> CH = getCompHypo(comhypopath); for (int l = 0; l < CH.Count(); l++) { CompositionHypothesisEntry temp2 = new CompositionHypothesisEntry(); temp2 = CH[l]; temp2.NumGlycosylations = Temp[k].NumGlycosylations + 1; temp2.PepModification = Temp[k].PepModification; temp2.PepSequence = Temp[k].PepSequence; temp2.MissedCleavages = Temp[k].MissedCleavages; temp2.StartAA = Temp[k].StartAA; temp2.EndAA = Temp[k].EndAA; List<string> forsorting = new List<string>(); forsorting.Add(Temp[k].CompoundComposition); forsorting.Add(temp2.CompoundComposition); forsorting = forsorting.OrderBy(a => a).ToList(); temp2.CompoundComposition = forsorting[0] + forsorting[1]; temp2.AdductNum = temp2.AdductNum + Temp[k].AdductNum; for (int s = 0; s < temp2.eqCounts.Count; s++) { temp2.eqCounts[s] = temp2.eqCounts[s] + Temp[k].eqCounts[s]; } for (int s = 0; s < temp2.ElementAmount.Count; s++) { temp2.ElementAmount[s] = temp2.ElementAmount[s] + Temp[k].ElementAmount[s]; } for (int ui = 0; ui < molename.Count(); ui++) { if (molename[ui] == "Water") { if (temp2.eqCounts[ui] > 0) { temp2.eqCounts[ui] = temp2.eqCounts[ui] - 1; } break; } } #region Modified by JK //temp2.elementAmount[indexH] = temp2.elementAmount[indexH] - 2; //temp2.elementAmount[indexO] = temp2.elementAmount[indexO] - 1; //if (temp2.elementAmount[indexO] < 0) // temp2.elementAmount[indexO] = 0; //if (temp2.elementAmount[indexH] < 0) // temp2.elementAmount[indexH] = 0; /* These fields are not present in the Database-generated hypothesis, * but they are not appropriately error-checked when computed earlier. * This bandaid should let existing files work while letting Database- * generated ones through as well. This function is very difficult to * trace in and would benefit from rewriting in the future. */ if ((indexH > 0) && (indexO > 0)) { temp2.ElementAmount[indexH] = temp2.ElementAmount[indexH] - 2; temp2.ElementAmount[indexO] = temp2.ElementAmount[indexO] - 1; if (temp2.ElementAmount[indexO] < 0) temp2.ElementAmount[indexO] = 0; if (temp2.ElementAmount[indexH] < 0) temp2.ElementAmount[indexH] = 0; } //else //{ // temp2.elementAmount[indexH] = 0; // temp2.elementAmount[indexO] = 0; //} #endregion //Hard coded removal of extra water from neutral Charge glycan. temp2.MassWeight = temp2.MassWeight + Temp[k].MassWeight - PT.getMass("H") * 2 - PT.getMass("O"); Temp2.Add(temp2); } } Temp.AddRange(Temp2); } Ans.AddRange(Temp); } } //Remove Duplicates from CHy Ans = Ans.OrderBy(a => a.MassWeight).ToList(); CHy.Clear(); for (int i = 0; i < Ans.Count() - 1; i++) { bool thesame = false; bool equal = (Ans[i].eqCounts.Count == Ans[i + 1].eqCounts.Count) && new HashSet<int>(Ans[i].eqCounts).SetEquals(Ans[i + 1].eqCounts); if (Ans[i].PepSequence == Ans[i + 1].PepSequence && equal) { if (Ans[i].AdductNum == Ans[i + 1].AdductNum && Ans[i].PepModification == Ans[i + 1].PepModification) { thesame = true; } } if (!thesame) CHy.Add(Ans[i]); } Console.WriteLine("Ans Length {0}", Ans.Count()); //Enter elementID and MoleNames into each rows CHy.Add(Ans[Ans.Count() - 1]); for (int i = 0; i < CHy.Count(); i++) { CHy[i].ElementNames.Clear(); CHy[i].MoleculeNames.Clear(); if (i == CHy.Count() - 1) { CHy[0].ElementNames = elementIDs; CHy[0].MoleculeNames = molename; } } //Obtain the Name of the adduct molecules: GlycanHypothesisCombinatorialGenerator GD = new GlycanHypothesisCombinatorialGenerator(); GD.Modification = AddRep.Split('/'); //Send to generate DataTable Console.WriteLine(CHy[0]); theComhypoOnTab2 = genDT(CHy, GD); }
/// <summary> /// Create a deep copy of the comphypo object. /// </summary> /// <returns></returns> public CompositionHypothesisEntry Clone() { CompositionHypothesisEntry dup = new CompositionHypothesisEntry(); dup.CompoundComposition = this.CompoundComposition; dup.AdductNum = this.AdductNum; dup.AddRep = this.AddRep; dup.PepModification = this.PepModification; dup.PepSequence = this.PepSequence; dup.MissedCleavages = this.MissedCleavages; dup.NumGlycosylations = this.NumGlycosylations; dup.StartAA = this.StartAA; dup.EndAA = this.EndAA; dup.ProteinID = this.ProteinID; dup.MassWeight = this.MassWeight; dup.IsDecoy = this.IsDecoy; foreach (string id in this.ElementNames) { dup.ElementNames.Add(id); } foreach (int amount in this.ElementAmount) { dup.ElementAmount.Add(amount); } foreach (string molName in this.MoleculeNames) { dup.MoleculeNames.Add(molName); } foreach (int eq in this.eqCounts) { dup.eqCounts.Add(eq); } foreach (KeyValuePair<String, Int32> kvp in this.eqCount) { dup.eqCount.Add(kvp.Key, kvp.Value); } return dup; }
private List<CompositionHypothesisEntry> getPPhypo(List<CompositionHypothesisEntry> CHy, List<CompositionHypothesisEntry> CH, List<Peptide> PP) { List<CompositionHypothesisEntry> Ans = new List<CompositionHypothesisEntry>(); Ans.AddRange(CHy); for (int i = 0; i < PP.Count; i++) { if (PP[i].Selected) { Int32 Count = Convert.ToInt32(PP[i].NumGlycosylations); List<CompositionHypothesisEntry> Temp = new List<CompositionHypothesisEntry>(); CompositionHypothesisEntry temp = new CompositionHypothesisEntry(); //First line: temp.CompoundComposition = ""; temp.AdductNum = 0; temp.AddRep = ""; temp.eqCount.Add("A", 0); temp.eqCount.Add("B", 0); temp.eqCount.Add("C", 0); temp.eqCount.Add("D", 0); temp.eqCount.Add("E", 0); temp.eqCount.Add("F", 0); temp.eqCount.Add("G", 0); temp.eqCount.Add("H", 0); temp.eqCount.Add("I", 0); temp.eqCount.Add("J", 0); temp.eqCount.Add("K", 0); temp.eqCount.Add("L", 0); temp.eqCount.Add("M", 0); temp.eqCount.Add("N", 0); temp.eqCount.Add("O", 0); temp.eqCount.Add("P", 0); temp.eqCount.Add("Q", 0); temp.MassWeight = PP[i].Mass; //columns for glycopeptides temp.PepModification = PP[i].Modifications; temp.PepSequence = PP[i].Sequence; temp.MissedCleavages = PP[i].MissedCleavages; temp.NumGlycosylations = 0; Temp.Add(temp); for (int j = 0; j < Count; j++) { List<CompositionHypothesisEntry> Temp2 = new List<CompositionHypothesisEntry>(); for (int k = 0; k < Temp.Count; k++) { for (int l = 0; l < CH.Count; l++) { CompositionHypothesisEntry temp2 = new CompositionHypothesisEntry(); temp2 = CH[l]; temp2.MassWeight = temp2.MassWeight + Temp[k].MassWeight; temp2.NumGlycosylations = Count; temp2.PepModification = Temp[k].PepModification; temp2.PepSequence = Temp[k].PepSequence; temp2.MissedCleavages = Temp[k].MissedCleavages; Temp2.Add(temp2); } } Temp.AddRange(Temp2); } Ans.AddRange(Temp); } } return Ans; }
//Used by matching part to prevent pass by reference. private ResultsGroup matchPassbyValue(ResultsGroup input1, CompositionHypothesisEntry comhypo) { ResultsGroup storage = new ResultsGroup(); //Pass by value, I only way I know we can do this is to pass them one by one. Yes, it is troublesome. storage.DeconRow = input1.DeconRow; storage.MostAbundant = input1.MostAbundant; storage.NumChargeStates = input1.NumChargeStates; storage.ScanDensity = input1.ScanDensity; storage.NumModiStates = input1.NumModiStates; storage.TotalVolume = input1.TotalVolume; storage.ExpectedA = input1.ExpectedA; storage.CentroidScan = input1.CentroidScan; storage.NumOfScan = input1.NumOfScan; storage.AvgSigNoise = input1.AvgSigNoise; storage.MaxScanNum = input1.MaxScanNum; storage.MinScanNum = input1.MinScanNum; storage.ScanNumList = input1.ScanNumList; storage.ChargeStateList = input1.ChargeStateList; storage.AvgSigNoiseList = input1.AvgSigNoiseList; storage.CentroidScanLR = input1.CentroidScanLR; storage.AvgAA2List = input1.AvgAA2List; storage.PredictedComposition = comhypo; storage.Match = true; return storage; }
//this "Grouping" function performs the grouping. private List<ResultsGroup> Groupings(String filename, ParametersForm.ParameterSettings modelParameters, Double Mas, List<CompositionHypothesisEntry> comhyp) { GetDeconData DeconDATA1 = new GetDeconData(); List<string> elementIDs = new List<string>(); List<string> molename = new List<string>(); for (int i = 0; i < comhyp.Count(); i++ ) { if (comhyp[i].ElementNames.Count > 0) { for (int j = 0; j < comhyp[i].ElementNames.Count(); j++) { elementIDs.Add(comhyp[i].ElementNames[j]); } for (int j = 0; j < comhyp[i].MoleculeNames.Count(); j++) { molename.Add(comhyp[i].MoleculeNames[j]); } break; } } List<DeconRow> sortedDeconData = new List<DeconRow>();; sortedDeconData = DeconDATA1.getdata(filename); //First, sort the list descendingly by its abundance. sortedDeconData = sortedDeconData.OrderByDescending(a => a.abundance).ToList(); //################Second, create a new list to store results from the first grouping.############### List<ResultsGroup> fgResults = new List<ResultsGroup>(); ResultsGroup GR2 = new ResultsGroup(); Int32 currentMaxBin = new Int32(); currentMaxBin = 1; GR2.DeconRow = sortedDeconData[0]; GR2.MostAbundant = true; GR2.NumOfScan = 1; GR2.MinScanNum = sortedDeconData[0].ScanNum; GR2.MaxScanNum = sortedDeconData[0].ScanNum; GR2.ChargeStateList = new List<int>(); GR2.ChargeStateList.Add(sortedDeconData[0].charge); GR2.AvgSigNoiseList = new List<Double>(); GR2.AvgSigNoiseList.Add(sortedDeconData[0].SignalNoiseRatio); GR2.AvgAA2List = new List<double>(); GR2.AvgAA2List.Add(sortedDeconData[0].MonoisotopicAbundance / (sortedDeconData[0].MonoisotopicPlus2Abundance + 1)); GR2.ScanNumList = new List<Int32>(); GR2.ScanNumList.Add(sortedDeconData[0].ScanNum); GR2.NumModiStates = 1; GR2.TotalVolume = sortedDeconData[0].abundance * sortedDeconData[0].fwhm; GR2.ListAbundance = new List<double>(); GR2.ListAbundance.Add(sortedDeconData[0].abundance); GR2.ListMonoMassWeight = new List<double>(); GR2.ListMonoMassWeight.Add(sortedDeconData[0].MonoisotopicMassWeight); fgResults.Add(GR2); for (int j = 1; j < sortedDeconData.Count; j++) { for (int i = 0; i < fgResults.Count; i++) { //Obtain grouping error. Note: its in ppm, so it needs to be multiplied by 0.000001. Double GroupingError = fgResults[i].DeconRow.MonoisotopicMassWeight * modelParameters.GroupingErrorEG * 0.000001; if ((sortedDeconData[j].MonoisotopicMassWeight < (fgResults[i].DeconRow.MonoisotopicMassWeight + GroupingError) && (sortedDeconData[j].MonoisotopicMassWeight > (fgResults[i].DeconRow.MonoisotopicMassWeight - GroupingError)))) { if (fgResults[i].MaxScanNum < sortedDeconData[j].ScanNum) { fgResults[i].MaxScanNum = sortedDeconData[j].ScanNum; } else if (fgResults[i].MinScanNum > sortedDeconData[j].ScanNum) { fgResults[i].MinScanNum = sortedDeconData[j].ScanNum; } fgResults[i].NumOfScan = fgResults[i].NumOfScan + 1; fgResults[i].ScanNumList.Add(sortedDeconData[j].ScanNum); fgResults[i].TotalVolume = fgResults[i].TotalVolume + sortedDeconData[j].abundance * sortedDeconData[j].fwhm; fgResults[i].ChargeStateList.Add(sortedDeconData[j].charge); fgResults[i].AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); fgResults[i].AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); fgResults[i].ListAbundance.Add(sortedDeconData[j].abundance); fgResults[i].ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); break; } if (i == fgResults.Count - 1) { ResultsGroup GR = new ResultsGroup(); currentMaxBin = currentMaxBin + 1; GR.DeconRow = sortedDeconData[j]; GR.MostAbundant = true; GR.NumOfScan = 1; GR.MinScanNum = sortedDeconData[j].ScanNum; GR.MaxScanNum = sortedDeconData[j].ScanNum; GR.ChargeStateList = new List<int>(); GR.ChargeStateList.Add(sortedDeconData[j].charge); GR.AvgSigNoiseList = new List<Double>(); GR.AvgSigNoiseList.Add(sortedDeconData[j].SignalNoiseRatio); GR.AvgAA2List = new List<double>(); GR.AvgAA2List.Add(sortedDeconData[j].MonoisotopicAbundance / (sortedDeconData[j].MonoisotopicPlus2Abundance + 1)); GR.ScanNumList = new List<int>(); GR.ScanNumList.Add(sortedDeconData[j].ScanNum); GR.NumModiStates = 1; GR.TotalVolume = sortedDeconData[j].abundance * sortedDeconData[j].fwhm; GR.ListAbundance = new List<double>(); GR.ListAbundance.Add(sortedDeconData[j].abundance); GR.ListMonoMassWeight = new List<double>(); GR.ListMonoMassWeight.Add(sortedDeconData[j].MonoisotopicMassWeight); fgResults.Add(GR); } } } //Lastly calculate the Average Weighted Abundance for (int y = 0; y < fgResults.Count(); y++) { Double sumofTopPart = 0; for (int z = 0; z < fgResults[y].ListMonoMassWeight.Count(); z++) { sumofTopPart = sumofTopPart + fgResults[y].ListMonoMassWeight[z] * fgResults[y].ListAbundance[z]; } fgResults[y].DeconRow.MonoisotopicMassWeight = sumofTopPart / fgResults[y].ListAbundance.Sum(); } //######################## Here is the second grouping. ################################ fgResults = fgResults.OrderBy(o => o.DeconRow.MonoisotopicMassWeight).ToList(); if (Mas != 0) { for (int i = 0; i < fgResults.Count - 1; i++) { if (fgResults[i].MostAbundant == true) { int numModStates = 1; for (int j = i + 1; j < fgResults.Count; j++) { Double AdductTolerance = fgResults[i].DeconRow.MonoisotopicMassWeight * modelParameters.AdductToleranceEA * 0.000001; if ((fgResults[i].DeconRow.MonoisotopicMassWeight >= (fgResults[j].DeconRow.MonoisotopicMassWeight - Mas * numModStates - AdductTolerance)) && (fgResults[i].DeconRow.MonoisotopicMassWeight <= (fgResults[j].DeconRow.MonoisotopicMassWeight - Mas * numModStates + AdductTolerance))) { //obtain max and min scan number if (fgResults[i].MaxScanNum < fgResults[j].MaxScanNum) { fgResults[i].MaxScanNum = fgResults[j].MaxScanNum; } else { fgResults[i].MaxScanNum = fgResults[i].MaxScanNum; } if (fgResults[i].MinScanNum > fgResults[j].MinScanNum) { fgResults[i].MinScanNum = fgResults[j].MinScanNum; } else { fgResults[i].MinScanNum = fgResults[i].MinScanNum; } //numOfScan fgResults[i].NumOfScan = fgResults[i].NumOfScan + fgResults[j].NumOfScan; fgResults[i].ScanNumList.AddRange(fgResults[j].ScanNumList); //ChargeStateList for (int h = 0; h < fgResults[j].ChargeStateList.Count; h++) { fgResults[i].ChargeStateList.Add(fgResults[j].ChargeStateList[h]); } //avgSigNoiseList for (int h = 0; h < fgResults[j].AvgSigNoiseList.Count; h++) { fgResults[i].AvgSigNoiseList.Add(fgResults[j].AvgSigNoiseList[h]); } //avgAA2List for (int h = 0; h < fgResults[j].AvgAA2List.Count; h++) { fgResults[i].AvgAA2List.Add(fgResults[j].AvgAA2List[h]); } //numModiStates numModStates++; fgResults[i].NumModiStates = fgResults[i].NumModiStates + 1; fgResults[j].MostAbundant = false; //TotalVolume fgResults[i].TotalVolume = fgResults[i].TotalVolume + fgResults[j].TotalVolume; if (fgResults[i].DeconRow.abundance < fgResults[j].DeconRow.abundance) { fgResults[i].DeconRow = fgResults[j].DeconRow; numModStates = 1; } } else if (fgResults[i].DeconRow.MonoisotopicMassWeight < (fgResults[j].DeconRow.MonoisotopicMassWeight - (Mas + AdductTolerance * 2) * numModStates)) { //save running time. Since the list is sorted, any other mass below won't match as an adduct. break; } } } } } else { for (int i = 0; i < fgResults.Count; i++) { fgResults[i].NumModiStates = 0; } } List<ResultsGroup> sgResults = new List<ResultsGroup>(); //Implement the scan number threshold fgResults = fgResults.OrderByDescending(a => a.NumOfScan).ToList(); Int32 scanCutOff = fgResults.Count() + 1; for (int t = 0; t < fgResults.Count(); t++) { if (fgResults[t].NumOfScan < modelParameters.MinScanNumber) { scanCutOff = t; break; } } if (scanCutOff != fgResults.Count() + 1) { fgResults.RemoveRange(scanCutOff, fgResults.Count() - scanCutOff); } //############# This is the matching part. It matches the composition hypothesis with the grouped decon data.############ String[] MolNames = new String[17]; //These numOfMatches and lists are used to fit the linear regression model for Expect A: A+2. They are put here to decrease the already-int running time. Int32 numOfMatches = new Int32(); List<Double> moleWeightforA = new List<Double>(); List<Double> AARatio = new List<Double>(); //Used to obtain all available bins for centroid scan error. //Read the other lines for compTable data. fgResults = fgResults.OrderByDescending(a => a.DeconRow.MonoisotopicMassWeight).ToList(); comhyp = comhyp.OrderByDescending(b => b.MassWeight).ToList(); bool hasMatch = false; int lastMatch = 0; for (int j = 0; j < fgResults.Count; j++) { if (fgResults[j].MostAbundant == true) { lastMatch = lastMatch - 4; if (lastMatch < 0) lastMatch = 0; for (int i = lastMatch; i < comhyp.Count; i++) { Double MatchingError = comhyp[i].MassWeight * modelParameters.MatchErrorEM * 0.000001; if ((fgResults[j].DeconRow.MonoisotopicMassWeight <= (comhyp[i].MassWeight + MatchingError)) && (fgResults[j].DeconRow.MonoisotopicMassWeight >= (comhyp[i].MassWeight - MatchingError))) { ResultsGroup GR = new ResultsGroup(); GR = matchPassbyValue(fgResults[j], comhyp[i]); sgResults.Add(GR); //Stuffs for feature numOfMatches++; moleWeightforA.Add(fgResults[j].DeconRow.MonoisotopicMassWeight); AARatio.Add(fgResults[j].AvgAA2List.Average()); lastMatch = i + 1; hasMatch = true; continue; } //Since the data is sorted, there are no more matches below that row, break it. if (fgResults[j].DeconRow.MonoisotopicMassWeight > (comhyp[i].MassWeight + MatchingError)) { if (hasMatch == false) { ResultsGroup GR = new ResultsGroup(); CompositionHypothesisEntry comhypi = new CompositionHypothesisEntry(); GR = fgResults[j]; GR.Match = false; GR.PredictedComposition = comhypi; sgResults.Add(GR); lastMatch = i; break; } else { hasMatch = false; break; } } } } } //##############Last part, this is to calculate the feature data needed for logistic regression################### //Expected A and Centroid Scan Error need linear regression. The models are built here separately. //In the this model. output is the Y axis and input is X. SimpleLinearRegression AA2regression = new SimpleLinearRegression(); List<double> aainput = new List<double>(); List<double> aaoutput = new List<double>(); //Centroid Scan Error List<double> ccinput = new List<double>(); List<double> ccoutput = new List<double>(); if (numOfMatches > 3) { for (int i = 0; i < sgResults.Count; i++) { if (sgResults[i].Match == true) { if (sgResults[i].AvgAA2List.Average() != 0) { aainput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); aaoutput.Add(sgResults[i].AvgAA2List.Average()); } if (sgResults[i].DeconRow.abundance > 250) { ccoutput.Add(sgResults[i].DeconRow.ScanNum); ccinput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); } } } } else { for (int i = 0; i < sgResults.Count; i++) { if (sgResults[i].AvgAA2List.Average() != 0) { aainput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); aaoutput.Add(sgResults[i].AvgAA2List.Average()); } if (sgResults[i].DeconRow.abundance > 250) { ccoutput.Add(sgResults[i].ScanNumList.Average()); ccinput.Add(sgResults[i].DeconRow.MonoisotopicMassWeight); } } } SimpleLinearRegression CSEregression = new SimpleLinearRegression(); CSEregression.Regress(ccinput.ToArray(), ccoutput.ToArray()); AA2regression.Regress(aainput.ToArray(), aaoutput.ToArray()); //The remaining features and input them into the grouping results for (int i = 0; i < sgResults.Count; i++) { //ScanDensiy is: Number of scan divided by (max scan number – min scan number) Double ScanDensity = new Double(); Int32 MaxScanNumber = sgResults[i].MaxScanNum; Int32 MinScanNumber = sgResults[i].MinScanNum; Double NumOfScan = sgResults[i].NumOfScan; List<Int32> numChargeStatesList = sgResults[i].ChargeStateList.Distinct().ToList(); Int32 numChargeStates = numChargeStatesList.Count; Double numModiStates = sgResults[i].NumModiStates; if ((MaxScanNumber - MinScanNumber) != 0) ScanDensity = NumOfScan / (MaxScanNumber - MinScanNumber + 15); else ScanDensity = 0; //Use this scandensity for all molecules in this grouping. sgResults[i].NumChargeStates = numChargeStates; sgResults[i].ScanDensity = ScanDensity; sgResults[i].NumModiStates = numModiStates; sgResults[i].CentroidScanLR = CSEregression.Compute(sgResults[i].DeconRow.MonoisotopicMassWeight); sgResults[i].CentroidScan = Math.Abs(sgResults[i].ScanNumList.Average() - sgResults[i].CentroidScanLR); sgResults[i].ExpectedA = Math.Abs(sgResults[i].AvgAA2List.Average() - AA2regression.Compute(sgResults[i].DeconRow.MonoisotopicMassWeight)); sgResults[i].AvgSigNoise = sgResults[i].AvgSigNoiseList.Average(); } for (int i = 0; i < sgResults.Count(); i++ ) { sgResults[i].PredictedComposition.ElementNames.Clear(); sgResults[i].PredictedComposition.MoleculeNames.Clear(); if (i == sgResults.Count() - 1) { sgResults[0].PredictedComposition.ElementNames = elementIDs; sgResults[0].PredictedComposition.MoleculeNames = molename; } } return sgResults; }
public ResultsGroup() { PredictedComposition = new CompositionHypothesisEntry(); }
//This is used to read a ResultFile public List<ResultsGroup> ReadResultsFromFile(String path) { //This code looks int, but its just repetitive code. Look for ext and you will understand. List<ResultsGroup> Ans = new List<ResultsGroup>(); List<String> molnames = new List<String>(); FileStream FS = new FileStream(path, FileMode.Open, FileAccess.Read); StreamReader read = new StreamReader(FS); String ext = Path.GetExtension(path).Replace(".", ""); if (ext == "csv") { String header = read.ReadLine(); String[] headers = header.Split(','); List<string> elementIDs = new List<string>(); //This is another older form of data if (headers[5] != "Hypothesis MW") { Boolean moreCompounds = true; int i = 17; while (moreCompounds) { if (headers[i] != "Hypothesis MW") { elementIDs.Add(headers[i]); i++; } else { moreCompounds = false; i++; } } moreCompounds = true; while (moreCompounds) { if (headers[i] != "Adduct/Replacement") { molnames.Add(headers[i]); i++; } else moreCompounds = false; } bool firstRow = true; while (read.Peek() >= 0) { //Read data String Line = read.ReadLine(); String[] Lines = Line.Split(','); //initialize new gR object ResultsGroup gR = new ResultsGroup(); DeconRow dR = new DeconRow(); CompositionHypothesisEntry cH = new CompositionHypothesisEntry(); gR.DeconRow = dR; gR.PredictedComposition = cH; //Input data if (!String.IsNullOrEmpty(Lines[0])) { if (firstRow) { gR.PredictedComposition.ElementNames = elementIDs; gR.PredictedComposition.MoleculeNames = molnames; firstRow = false; } gR.Score = Convert.ToDouble(Lines[0]); gR.DeconRow.MonoisotopicMassWeight = Convert.ToDouble(Lines[1]); gR.PredictedComposition.CompoundComposition = Lines[2]; if (String.IsNullOrEmpty(Lines[2]) || Lines[2] == "0") gR.Match = false; else gR.Match = true; gR.PredictedComposition.PepSequence = Lines[3]; gR.NumModiStates = Convert.ToDouble(Lines[5]); gR.NumChargeStates = Convert.ToInt32(Lines[6]); gR.NumOfScan = Convert.ToDouble(Lines[7]); gR.ScanDensity = Convert.ToDouble(Lines[8]); gR.ExpectedA = Convert.ToDouble(Lines[9]); gR.AvgAA2List = new List<double>(); gR.AvgAA2List.Add(Convert.ToDouble(Lines[10])); gR.TotalVolume = Convert.ToDouble(Lines[11]); gR.AvgSigNoise = Convert.ToDouble(Lines[12]); gR.CentroidScan = Convert.ToDouble(Lines[13]); gR.DeconRow.ScanNum = Convert.ToInt32(Lines[14]); gR.MaxScanNum = Convert.ToInt32(Lines[15]); gR.MinScanNum = Convert.ToInt32(Lines[16]); gR.PredictedComposition.eqCount = new Dictionary<string, int>(); int sh = 17; for (int ele = 0; ele < elementIDs.Count(); ele++ ) { gR.PredictedComposition.ElementAmount.Add(Convert.ToInt32(Lines[sh])); sh++; } gR.PredictedComposition.MassWeight = Convert.ToDouble(Lines[sh]); sh++; List<int> eqCoun = new List<int>(); for (int j = 0; j < molnames.Count(); j++) { eqCoun.Add(Convert.ToInt32(Lines[sh + j])); } gR.PredictedComposition.eqCounts = eqCoun; gR.PredictedComposition.AddRep = Lines[sh + molnames.Count()]; gR.PredictedComposition.AdductNum = Convert.ToInt32(Lines[sh + molnames.Count() + 1]); gR.PredictedComposition.PepModification = Lines[sh + molnames.Count() + 2]; gR.PredictedComposition.MissedCleavages = Convert.ToInt32(Lines[sh + molnames.Count() + 3]); gR.PredictedComposition.NumGlycosylations = Convert.ToInt32(Lines[sh + molnames.Count() + 4]); gR.PredictedComposition.StartAA = Convert.ToInt32(Lines[sh + molnames.Count() + 5]); gR.PredictedComposition.EndAA = Convert.ToInt32(Lines[sh + molnames.Count() + 6]); if (Lines.Count() > sh + molnames.Count() + 7) { gR.PredictedComposition.ProteinID = Lines[sh + molnames.Count() + 7]; } else { gR.PredictedComposition.ProteinID = "?"; } Ans.Add(gR); } } } //older data format. else if (headers[3] == "PeptideSequence") { Boolean moreCompounds = true; int i = 24; while (moreCompounds) { if (headers[i] != "Adduct/Replacement") { molnames.Add(headers[i]); i++; } else moreCompounds = false; } bool firstRow = true; while (read.Peek() >= 0) { //Read data String Line = read.ReadLine(); String[] Lines = Line.Split(','); //initialize new gR object ResultsGroup gR = new ResultsGroup(); DeconRow dR = new DeconRow(); CompositionHypothesisEntry cH = new CompositionHypothesisEntry(); gR.DeconRow = dR; gR.PredictedComposition = cH; if (firstRow) { gR.PredictedComposition.ElementNames.AddRange(new List<string> { "C", "H", "N", "O", "S", "P" }); gR.PredictedComposition.MoleculeNames = molnames; firstRow = false; } //Input data if (!String.IsNullOrEmpty(Lines[0])) { gR.Score = Convert.ToDouble(Lines[0]); gR.DeconRow.MonoisotopicMassWeight = Convert.ToDouble(Lines[1]); gR.PredictedComposition.CompoundComposition = Lines[2]; if (String.IsNullOrEmpty(Lines[2]) || Lines[2] == "0") gR.Match = false; else gR.Match = true; gR.PredictedComposition.PepSequence = Lines[3]; gR.PredictedComposition.MassWeight = Convert.ToDouble(Lines[5]); gR.NumModiStates = Convert.ToDouble(Lines[6]); gR.NumChargeStates = Convert.ToInt32(Lines[7]); gR.NumOfScan = Convert.ToDouble(Lines[8]); gR.ScanDensity = Convert.ToDouble(Lines[9]); gR.ExpectedA = Convert.ToDouble(Lines[10]); gR.AvgAA2List = new List<double>(); gR.AvgAA2List.Add(Convert.ToDouble(Lines[11])); gR.TotalVolume = Convert.ToDouble(Lines[12]); gR.AvgSigNoise = Convert.ToDouble(Lines[13]); gR.CentroidScan = Convert.ToDouble(Lines[14]); gR.DeconRow.ScanNum = Convert.ToInt32(Lines[15]); gR.MaxScanNum = Convert.ToInt32(Lines[16]); gR.MinScanNum = Convert.ToInt32(Lines[17]); gR.PredictedComposition.eqCount = new Dictionary<string, int>(); for (int k = 18; k < 24; k++) { gR.PredictedComposition.ElementAmount.Add(Convert.ToInt32(Lines[k])); } List<int> eqCoun = new List<int>(); for (int j = 0; j < molnames.Count(); j++) { eqCoun.Add(Convert.ToInt32(Lines[24 + j])); } gR.PredictedComposition.eqCounts = eqCoun; gR.PredictedComposition.AddRep = Lines[24 + molnames.Count()]; gR.PredictedComposition.AdductNum = Convert.ToInt32(Lines[24 + molnames.Count() + 1]); gR.PredictedComposition.PepModification = Lines[24 + molnames.Count() + 2]; gR.PredictedComposition.MissedCleavages = Convert.ToInt32(Lines[24 + molnames.Count() + 3]); gR.PredictedComposition.NumGlycosylations = Convert.ToInt32(Lines[24 + molnames.Count() + 4]); Ans.Add(gR); } } } //This is supporting an older format of data. Today is Sept 2013, can be deleted after 1 year. else { Boolean moreCompounds = true; int i = 23; while (moreCompounds) { if (headers[i] != "Adduct/Replacement") { molnames.Add(headers[i]); i++; } else moreCompounds = false; } bool firstRow = true; while (read.Peek() >= 0) { //Read data String Line = read.ReadLine(); String[] Lines = Line.Split(','); //initialize new gR object ResultsGroup gR = new ResultsGroup(); if (firstRow) { gR.PredictedComposition.ElementNames.AddRange(new List<string> { "C", "H", "N", "O", "S", "P" }); gR.PredictedComposition.MoleculeNames = molnames; firstRow = false; } DeconRow dR = new DeconRow(); CompositionHypothesisEntry cH = new CompositionHypothesisEntry(); gR.DeconRow = dR; gR.PredictedComposition = cH; if (!String.IsNullOrEmpty(Lines[0])) { //Input data gR.Score = Convert.ToDouble(Lines[0]); gR.DeconRow.MonoisotopicMassWeight = Convert.ToDouble(Lines[1]); gR.PredictedComposition.CompoundComposition = Lines[2].Replace(",", ";"); if (String.IsNullOrEmpty(Lines[2]) || Lines[2] == "0") gR.Match = false; else gR.Match = true; gR.PredictedComposition.MassWeight = Convert.ToDouble(Lines[4]); gR.NumModiStates = Convert.ToDouble(Lines[5]); gR.NumChargeStates = Convert.ToInt32(Lines[6]); gR.NumOfScan = Convert.ToDouble(Lines[7]); gR.ScanDensity = Convert.ToDouble(Lines[8]); gR.ExpectedA = Convert.ToDouble(Lines[9]); gR.AvgAA2List = new List<double>(); gR.AvgAA2List.Add(Convert.ToDouble(Lines[10])); gR.TotalVolume = Convert.ToDouble(Lines[11]); gR.AvgSigNoise = Convert.ToDouble(Lines[12]); gR.CentroidScan = Convert.ToDouble(Lines[13]); gR.DeconRow.ScanNum = Convert.ToInt32(Lines[14]); gR.MaxScanNum = Convert.ToInt32(Lines[15]); gR.MinScanNum = Convert.ToInt32(Lines[16]); gR.PredictedComposition.eqCount = new Dictionary<string, int>(); for (int k = 17; k < 23; k++) { gR.PredictedComposition.ElementAmount.Add(Convert.ToInt32(Lines[k])); } gR.PredictedComposition.eqCount.Add("A", Convert.ToInt32(Lines[23])); gR.PredictedComposition.eqCount.Add("B", Convert.ToInt32(Lines[24])); gR.PredictedComposition.eqCount.Add("C", Convert.ToInt32(Lines[25])); gR.PredictedComposition.eqCount.Add("D", Convert.ToInt32(Lines[26])); gR.PredictedComposition.eqCount.Add("E", Convert.ToInt32(Lines[27])); gR.PredictedComposition.eqCount.Add("F", Convert.ToInt32(Lines[28])); gR.PredictedComposition.eqCount.Add("G", Convert.ToInt32(Lines[29])); gR.PredictedComposition.eqCount.Add("H", Convert.ToInt32(Lines[30])); gR.PredictedComposition.eqCount.Add("I", Convert.ToInt32(Lines[31])); gR.PredictedComposition.eqCount.Add("J", Convert.ToInt32(Lines[32])); gR.PredictedComposition.eqCount.Add("K", Convert.ToInt32(Lines[33])); gR.PredictedComposition.eqCount.Add("L", Convert.ToInt32(Lines[34])); gR.PredictedComposition.eqCount.Add("M", Convert.ToInt32(Lines[35])); gR.PredictedComposition.eqCount.Add("N", Convert.ToInt32(Lines[36])); gR.PredictedComposition.eqCount.Add("O", Convert.ToInt32(Lines[37])); gR.PredictedComposition.eqCount.Add("P", Convert.ToInt32(Lines[38])); gR.PredictedComposition.eqCount.Add("Q", Convert.ToInt32(Lines[39])); gR.PredictedComposition.AddRep = Lines[40]; gR.PredictedComposition.AdductNum = Convert.ToInt32(Lines[41]); gR.PredictedComposition.PepSequence = Lines[42]; gR.PredictedComposition.PepModification = Lines[43]; gR.PredictedComposition.MissedCleavages = Convert.ToInt32(Lines[44]); gR.PredictedComposition.NumGlycosylations = Convert.ToInt32(Lines[45]); Ans.Add(gR); } } } } //This is gly1 data. else { String header = read.ReadLine(); String[] headers = header.Split('\t'); while (read.Peek() >= 0) { //Read data String Line = read.ReadLine(); String[] Lines = Line.Split('\t'); //initialize new gR object ResultsGroup gR = new ResultsGroup(); DeconRow dR = new DeconRow(); CompositionHypothesisEntry cH = new CompositionHypothesisEntry(); gR.DeconRow = dR; gR.PredictedComposition = cH; if (!String.IsNullOrEmpty(Lines[0])) { //Input data gR.PredictedComposition.MoleculeNames = molnames; gR.Score = Convert.ToDouble(Lines[0]); gR.DeconRow.MonoisotopicMassWeight = Convert.ToDouble(Lines[1]); gR.PredictedComposition.CompoundComposition = Lines[2].Replace(",", ";"); if (String.IsNullOrEmpty(Lines[2]) || Lines[2] == "0") { gR.Match = false; gR.PredictedComposition.MassWeight = 0; } else { gR.Match = true; gR.PredictedComposition.MassWeight = Convert.ToDouble(Lines[4]); } gR.NumModiStates = Convert.ToDouble(Lines[5]); gR.NumChargeStates = Convert.ToInt32(Lines[6]); gR.NumOfScan = Convert.ToDouble(Lines[7]); gR.ScanDensity = Convert.ToDouble(Lines[8]); gR.ExpectedA = Convert.ToDouble(Lines[9]); gR.AvgAA2List = new List<double>(); gR.AvgAA2List.Add(Convert.ToDouble(Lines[10])); gR.TotalVolume = Convert.ToDouble(Lines[11]); gR.AvgSigNoise = Convert.ToDouble(Lines[12]); gR.CentroidScan = Convert.ToDouble(Lines[13]); gR.DeconRow.ScanNum = Convert.ToInt32(Convert.ToDouble(Lines[14])); Ans.Add(gR); } } } return Ans; }