public static List <SpectrumData> ReadKnownMSMSSpectraList(string knownMSMSPath) // read all known compands MSMS data to a list { List <SpectrumData> allSpectrumList = new List <SpectrumData>(); List <string> txtList = ReadFileToList(knownMSMSPath); for (int i = 0; i < txtList.Count - 1; i++) { if (txtList[i].Contains("eV:")) // indecating sampleName line { string[] tittleLine = txtList[i].Split(','); SpectrumData standardSpectrum = new SpectrumData(); List <PeakData> spectrumPeakList = new List <PeakData>(); for (int j = i + 1; j < txtList.Count; j++) { string[] line = txtList[j].Split(default(char[]), StringSplitOptions.RemoveEmptyEntries); if (line.GetLength(0) > 1) { PeakData peak = new PeakData(); peak.mz = double.Parse(line[0]); peak.intensity = double.Parse(line[1]); peak.sampleName = txtList[i].Replace(",", "_"); peak.rt = double.Parse(tittleLine[tittleLine.GetLength(0) - 2].Split(default(char[]), StringSplitOptions.RemoveEmptyEntries)[0]) * 60; spectrumPeakList.Add(peak); } else { break; } } standardSpectrum.peakList = spectrumPeakList; allSpectrumList.Add(standardSpectrum); } } return(allSpectrumList); }
private static SpectrumData AverageIntensityofMZline(SpectrumData allPeakDataInEachGroup) // this method is to calculate average intensity for each mz line with the one 3 times higher than noise in one of the 20-50ev spectrum(calculated by pvals). to create a list containing each mz as list[i] with position of this mz { SpectrumData averageSpectrum = new SpectrumData(); averageSpectrum.peakList = new List <PeakData>(); var mzLines = from peaks in allPeakDataInEachGroup.peakList group peaks by peaks.mz; foreach (var mzline in mzLines) { double sumIntensity = 0, averageIntensity = 0; int n = 0; List <string> peakPositionList = new List <string>(); StringBuilder peakPosition = new StringBuilder(); foreach (var peak in mzline) { if (peak.isNoise == false) { sumIntensity += peak.intensity; peakPositionList.Add(peak.sampleName.Split('_')[0]); n++; } } PeakData averPeak = mzline.ToList <PeakData>()[0]; // choose the first peak in each mz line averPeak.isNoise = true; if (n != 0) { averageIntensity = sumIntensity / n; if (n >= minSampleNrForCor) // minimun samples for correlation analysis { averPeak.isNoise = false; } } for (int i = 0; i < peakPositionList.Count; i++) { if (i == peakPositionList.Count - 1) { peakPosition.Append(peakPositionList[i]); } else { peakPosition.Append(peakPositionList[i] + "_"); } } averPeak.intensity = averageIntensity; averPeak.sampleName = peakPosition.ToString(); //name formats as 53_196_115 averageSpectrum.peakList.Add(averPeak); } averageSpectrum.group = allPeakDataInEachGroup.group; return(averageSpectrum); }
private static bool IsMainPeak(PeakData iso, PeakData main) { bool reslt = false; if (IsIsotope(iso.isotopes) && main.isotopes.Contains("[M]")) { int n = int.Parse(iso.isotopes.Split('+')[1].Replace("]", "")); if (((iso.mz - main.mz) < 1.01 * n) && (iso.mz - main.mz) > 1 * n) { reslt = true; } } return(reslt); }
/** * Merge multiple idMSMS (if mz difference less than 0.01, pick the higher intensity one) * This function is used in two cases:1, merge 20-50ev idMSMSs, 2, merge idMSMSs in the same pcgroup with NDP>threshold */ private static SpectrumData MergeSpectraToOne(List <SpectrumData> spectraWithSamePcgrp) { SpectrumData maxSpectrum = new SpectrumData(); maxSpectrum.peakList = new List <PeakData>(); maxSpectrum.group = spectraWithSamePcgrp.OrderByDescending(x => double.Parse(x.group.Split('_')[0])).ToList()[0].group; //choose the highest mz as precursor(if this is 20-50ev, group is the same, then no influnce) List <PeakData> allPeaksInOneGroup = new List <PeakData>(); for (int i = 0; i < spectraWithSamePcgrp.Count; i++) { allPeaksInOneGroup.AddRange(spectraWithSamePcgrp[i].peakList); } var allpeaks = from peaks in allPeaksInOneGroup group peaks by peaks.mz; foreach (var peaks in allpeaks) { PeakData maxPeak = peaks.OrderByDescending(x => x.intensity).ToList()[0]; //choose the highest intensity peak in one mz maxSpectrum.peakList.Add(maxPeak); } for (int m = 0; m < maxSpectrum.peakList.Count; m++) //remove redundancy(two mz <0.01) { for (int n = m + 1; n < maxSpectrum.peakList.Count; n++) { if (Cal_Cosine_Product.IsEqualMZ(maxSpectrum.peakList[m].mz, maxSpectrum.peakList[n].mz)) { if (maxSpectrum.peakList[m].intensity >= maxSpectrum.peakList[n].intensity) { maxSpectrum.peakList.RemoveAt(n); n = n - 1; } else { maxSpectrum.peakList[m] = maxSpectrum.peakList[n]; n = n - 1; } } } } for (int i = 0; i < maxSpectrum.peakList.Count; i++) //assign max precursor to all peaks { maxSpectrum.peakList[i].precursor = maxSpectrum.group; } maxSpectrum.peakList = maxSpectrum.peakList.OrderBy(x => x.mz).ToList(); return(maxSpectrum); }
private static PeakData AverageIntensityofMZline(List <PeakData> mzPeakList) // this method is to calculate average intensity for each mz line with the one 3 times higher than noise in one of the 20-50ev spectrum(calculated by pvals). to create a list containing each mz as list[i] with position of this mz { PeakData averPeak = new PeakData(); double sumIntensity = 0, averageIntensity = 0; int n = 0; List <string> peakPositionList = new List <string>(); StringBuilder peakPosition = new StringBuilder(); for (int i = 0; i < mzPeakList.Count; i++) { if (mzPeakList[i].isNoise == false) { sumIntensity += mzPeakList[i].intensity; peakPositionList.Add(mzPeakList[i].sampleName.Split('_')[0]); n++; } } averPeak = mzPeakList[0]; // choose the first peak in each mz line averPeak.isNoise = true; if (n != 0) { averageIntensity = sumIntensity / n; if (n >= minSampleNrForCor) // minimun samples for correlation analysis { averPeak.isNoise = false; } } for (int i = 0; i < peakPositionList.Count; i++) { if (i == peakPositionList.Count - 1) { peakPosition.Append(peakPositionList[i]); } else { peakPosition.Append(peakPositionList[i] + "_"); } } averPeak.intensity = averageIntensity; averPeak.sampleName = peakPosition.ToString(); //name formats as 53_196_115 return(averPeak); }
/** * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged) */ public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath) //read result CSV file generated by XCMS and CAMERA to a list of spectra { List <SpectrumData> allSpectrumList = new List <SpectrumData>(); List <PeakData> allPeakList = new List <PeakData>(); List <string> CSVList = new List <string>(); CSVList = FileProcess.ReadFileToList(csvFilePath); //CSVList = BigFileReader.GetFileContent(csvFilePath); string[] titleLine = null; if (CSVList.Count != 0) // extract tittle row { titleLine = CSVList[0].Split(','); } else { Console.WriteLine("CSVFile is empty!"); } int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5, isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1; //store the index of interested tittles, allocate initial default values. for (int j = 0; j < titleLine.GetLength(0); j++) { titleLine[j] = titleLine[j].Replace("\"", ""); switch (titleLine[j]) { case "mz": mzIndex = j; break; case "rt": rtIndex = j; break; case "isotopes": isotopesIndex = j; break; case "adduct": adductIndex = j; break; case "pcgroup": pcgroupIndex = j; break; case "pval": pvalIndex = j; break; case "feature": featureIndex = j; break; case "diffrt": diffrtIndex = j; break; case "precursor": precursorIndex = j; break; default: break; } } for (int i = 1; i < CSVList.Count; i++) // extract data rows, each time extract 1 row and take title row as a pair. { string[] line = CSVList[i].Replace("\"", "").Split(','); List <PeakData> mzPeaks = new List <PeakData>(); for (int j = 0; j < line.GetLength(0); j++) { if (titleLine[j].Contains("_") && titleLine[j].Length > 1) // all the sample names contain "_" char { PeakData peak = new PeakData(); peak.mz = double.Parse(line[mzIndex]); peak.rt = double.Parse(line[rtIndex]); peak.isotopes = line[isotopesIndex]; peak.adduct = line[adductIndex]; peak.pcgroup = int.Parse(line[pcgroupIndex]); peak.intensity = double.Parse(line[j]); peak.sampleName = titleLine[j]; if (diffrtIndex != 0) //MS doesnt contain pval, feature, diffrt { peak.pval = double.Parse(line[pvalIndex]); peak.feature = double.Parse(line[featureIndex]); peak.diffrt = double.Parse(line[diffrtIndex]); } peak.precursor = line[precursorIndex].Replace(" ", ""); if (double.Parse(line[j]) >= weight * bg) //intensity which is 3 times higher than the average noise will be deemed as real peak! { peak.isNoise = false; } else { peak.isNoise = true; } mzPeaks.Add(peak); } } PeakData averPeak = AverageIntensityofMZline(mzPeaks); allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left } var groupResult = from p in allPeakList group p by p.precursor; foreach (var groups in groupResult) { List <PeakData> peakList = new List <PeakData>(); peakList = groups.ToList <PeakData>(); SpectrumData spec = new SpectrumData(); spec.peakList = peakList; //spec contains all original peak data spec.group = groups.Key.Replace(" ", ""); if (peakList.Count != 0) //0 means no peaks, 1means single peak group, should be removed { allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList } else { continue; } } allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList(); return(allSpectrumList); }
public static double rtWindow = 9; // rt window to precursor // public static List <SpectrumData> MergeIdMSMSOfDiffCIDs(string rootDataPath) // using idMSMS precursor-product method by calculated pvals, this method will merge 20-50ev data by selecting highest intensity(the peaks are different among CIDs) { List <SpectrumData> mergedSpecList = new List <SpectrumData>(); List <SpectrumData> mergedCIDList = new List <SpectrumData>(); List <SpectrumData> AllCIDList = new List <SpectrumData>(); // all CID List <SpectrumData> CID20evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "20ev")); AllCIDList.AddRange(CID20evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList()); //remove rt<50 CID20evList = null; GC.Collect(); Console.WriteLine("20ev done!"); List <SpectrumData> CID30evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "30ev")); AllCIDList.AddRange(CID30evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList()); CID30evList = null; GC.Collect(); Console.WriteLine("30ev done!"); List <SpectrumData> CID40evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "40ev")); AllCIDList.AddRange(CID40evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList()); CID40evList = null; GC.Collect(); Console.WriteLine("40ev done!"); List <SpectrumData> CID50evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "50ev")); AllCIDList.AddRange(CID50evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList()); CID50evList = null; GC.Collect(); Console.WriteLine("50ev done!"); List <SpectrumData> MSList = ProcessPeaktableToSpectralist(rootDataPath); // Note: pcgroup is different among different CIDs List <PeakData> allMSPeakList = new List <PeakData>(); for (int i = 0; i < MSList.Count; i++) { allMSPeakList.AddRange(MSList[i].peakList.Where(x => x.rt > 50).ToList()); //already averaged, meaning that each PeakData=one mz line } MSList = null; GC.Collect(); var allSpec = from specs in AllCIDList group specs by specs.peakList[0].precursor; foreach (var specs in allSpec) { PeakData precursorPeak = allMSPeakList.Where(x => x.mz == double.Parse(specs.Key.Split('_')[0])).ToList()[0]; // find this precursor in MS double precursorPeakRT = precursorPeak.rt; /** * Filter presursor: 1,precursor should pass the "noise"threshold; 2, isotope is removed from precursor list(but isotopes can be fragments; 3, precursors will meet the criteria that "diffrt"(retention time difference) and "feature"(mass difference) are zero */ List <PeakData> peaksofeachprecursor = new List <PeakData>(); for (int k = 0; k < specs.ToList().Count; k++) { peaksofeachprecursor.AddRange(specs.ToList()[k].peakList); } if (precursorPeak.isNoise == true || IsIsotope(precursorPeak.isotopes) || !IsSelfCor(peaksofeachprecursor)) // determine whether precursor peak contains minimum number of "minSampleNrForCor" or is isotope, if not, not include { continue; } /** * Filter fragments: 1, fragments should not be "noise" peaks,2, fragment's "sample position" should be the subcollection of precursor's position,3, fragment's m/z less than precursor(The highest intensity of fragment is selected eventually,and then calculate its relativeintensity) */ List <string> precursorPeakPosition = precursorPeak.sampleName.Split('_').ToList(); double precursorMZ = precursorPeak.mz; List <PeakData> allPeaksInOneGroup = new List <PeakData>(); List <PeakData> MList = new List <PeakData>(); foreach (var spec in specs) { allPeaksInOneGroup.AddRange(spec.peakList.Where(x => (x.isNoise == false && x.diffrt < rtWindow)).ToList());// determine whether fragment peak contains minimum number of "minSampleNrForCor", if not, not include } for (int i = 0; i < allPeaksInOneGroup.Count; i++) { List <string> fragmentPeakPosition = allPeaksInOneGroup[i].sampleName.Split('_').ToList(); double fragmentMZ = allPeaksInOneGroup[i].mz; //until here, the higher mz is not removed, so fragment mz can be higher than precursor mz here if (precursorPeakPosition.Intersect(fragmentPeakPosition).Count() < minSampleNrForCor) // min samples for correlation { if (allPeaksInOneGroup[i].isotopes.Contains("[M]")) //after saving [M]+, check whether it is in the isotope list,if so, add this [M]+ { MList.Add(allPeaksInOneGroup[i]); } allPeaksInOneGroup.RemoveAt(i); i = i - 1; } } for (int i = 0; i < MList.Count; i++) { for (int j = 0; j < allPeaksInOneGroup.Count; j++) { if (IsMainPeak(allPeaksInOneGroup[j], MList[i])) { allPeaksInOneGroup.Add(MList[i]); break; } } } SpectrumData maxSpectrum = new SpectrumData(); maxSpectrum.peakList = allPeaksInOneGroup; maxSpectrum.group = specs.Key; List <SpectrumData> sd = new List <SpectrumData>(); sd.Add(maxSpectrum); maxSpectrum = MergeSpectraToOne(sd); // merge 20-50ev maxSpectrum.peakList = maxSpectrum.peakList.OrderBy(x => x.mz).ToList(); if (maxSpectrum.peakList.Count != 0) { mergedCIDList.Add(CalRelativeIntensity(maxSpectrum)); //calculate relative intensity } } AllCIDList = null; GC.Collect(); Console.WriteLine("Filter done!"); /** * merging sub-idMS/MS function: merge those idMSMSs which are in the same pcgroup && ndp>thresholdNDP, then remove higher mz than precursor, calculate relative intensity, calculate neutral loss */ int n = 0; var pcgrps = mergedCIDList.GroupBy(x => int.Parse(x.group.Split('_')[2])); //same pcgroup foreach (var specs in pcgrps) { List <SpectrumData> specList = specs.ToList(); if (specList.Count > 1) //check if the pcgroup contains nore than one idMSMS { List <SpectrumData> similarList = new List <SpectrumData>(); specList = specList.OrderBy(x => double.Parse(x.group.Split('_')[0])).ToList();//ascending order mz, merge from bottom up for (int i = 0; i < specList.Count - 1; i++) { PeakData precursorPeak = allMSPeakList.Where(x => x.mz == double.Parse(specList[i].group.Split('_')[0])).ToList()[0]; // find this precursor in MS if (!Data_Operation.IsPrecursor(precursorPeak.adduct)) // if the mz is not precursor, merge upwards { for (int j = i + 1; j < specList.Count; j++) { double ndp = 0; ndp = Cal_Cosine_Product.CalCosineProduct(specList[i], specList[j]); if (ndp > ndpThreshold) { List <SpectrumData> candiList = new List <SpectrumData>(); candiList.Add(specList[i]); candiList.Add(specList[j]); SpectrumData MeSpec = MergeSpectraToOne(candiList); specList[j] = MeSpec; //merge a and b (merged result is stored in b and delete a) specList.RemoveAt(i); i--; n++; break; } } } } } for (int i = 0; i < specList.Count; i++) { SpectrumData removeSpec = RemoveMZHigherThanPrecursor(specList[i]); //remove higher mz than precursor if (removeSpec.peakList.Count != 0) { SpectrumData finalSpec = CalRelativeIntensity(removeSpec);// cal relative intensity finalSpec.peakList = finalSpec.peakList.OrderByDescending(x => x.mz).ToList(); finalSpec.neutralLossList = Data_Operation.FindNeutralLoss(finalSpec); mergedSpecList.Add(finalSpec); } } } Console.WriteLine("n:" + n); Console.WriteLine("merge done!"); return(mergedSpecList); }
private static SpectrumData CalMaxAverageIntensity(SpectrumData allRelativeIntensity) // calculate maximum average intensity { SpectrumData maxAverageSpectrum = new SpectrumData(); maxAverageSpectrum.peakList = new List <PeakData>(); List <PeakData> spectrumList = new List <PeakData>(); var mz = from peaks in allRelativeIntensity.peakList group peaks by peaks.mz; foreach (var peaks in mz) { PeakData maxPeak = new PeakData(); List <PeakData> ev20 = new List <PeakData>(); //store 20ev samples List <PeakData> ev30 = new List <PeakData>(); List <PeakData> ev40 = new List <PeakData>(); List <PeakData> ev50 = new List <PeakData>(); List <PeakData> ms = new List <PeakData>(); foreach (var peak in peaks) { if (peak.sampleName.Contains("20eV")) { ev20.Add(peak); } if (peak.sampleName.Contains("30eV")) { ev30.Add(peak); } if (peak.sampleName.Contains("40eV")) { ev40.Add(peak); } if (peak.sampleName.Contains("50eV")) { ev50.Add(peak); } if (peak.sampleName.Contains(".Ms.")) { ms.Add(peak); } } List <PeakData> mzAverageList = new List <PeakData>(); // store average intensity if (ev20.Count != 0) { ev20[0].intensity = ev20.Average(x => x.intensity); mzAverageList.Add(ev20[0]); } if (ev30.Count != 0) { ev30[0].intensity = ev30.Average(x => x.intensity); mzAverageList.Add(ev30[0]); } if (ev40.Count != 0) { ev40[0].intensity = ev40.Average(x => x.intensity); mzAverageList.Add(ev40[0]); } if (ev50.Count != 0) { ev50[0].intensity = ev50.Average(x => x.intensity); mzAverageList.Add(ev50[0]); } if (ms.Count != 0) { ms[0].intensity = ms.Average(x => x.intensity); mzAverageList.Add(ms[0]); } maxPeak = mzAverageList[0]; maxPeak.intensity = mzAverageList.Max(x => x.intensity); maxPeak.sampleName = "MergedSpectrum" + maxPeak.pcgroup.ToString(); spectrumList.Add(maxPeak); } maxAverageSpectrum.peakList = spectrumList; maxAverageSpectrum.group = spectrumList[0].precursor; return(maxAverageSpectrum); }