Ejemplo n.º 1
0
        /* This method is to calculate all the neutral loss in each spectrum.
         *  cross over all peaks!
         * //*/
        public static List <double> FindNeutralLoss(SpectrumData spectrum)
        {
            List <double>   neutralLossList = new List <double>();
            List <PeakData> sortedPeakList  = spectrum.peakList.OrderByDescending(x => x.mz).Where(x => !IdMSMS_Deconvolution.IsIsotope(x.isotopes)).ToList(); //并计算和isotope的NL
            double          mzPrecursor     = double.Parse(spectrum.group.Split('_')[0]);

            for (int i = 0; i < sortedPeakList.Count; i++)   // add precursor neutral loss
            {
                double neutralLoss = mzPrecursor - sortedPeakList[i].mz;
                if (neutralLoss > 14)
                {
                    neutralLossList.Add(neutralLoss);
                }
            }
            for (int i = 0; i < sortedPeakList.Count; i++)   // add peaks neutral loss
            {
                for (int j = i + 1; j < sortedPeakList.Count; j++)
                {
                    double neutralLoss = sortedPeakList[i].mz - sortedPeakList[j].mz;
                    if (neutralLoss > 14)
                    {
                        neutralLossList.Add(neutralLoss);
                    }
                }
            }
            neutralLossList = neutralLossList.Distinct().ToList();
            return(neutralLossList);
        }
        public static List <SpectrumData> ReadKnownMSMSSpectraList(string knownMSMSPath)  // read all known compands MSMS data to a list
        {
            List <SpectrumData> allSpectrumList = new List <SpectrumData>();
            List <string>       txtList         = ReadFileToList(knownMSMSPath);

            for (int i = 0; i < txtList.Count - 1; i++)
            {
                if (txtList[i].Contains("eV:"))   // indecating sampleName line
                {
                    string[]        tittleLine       = txtList[i].Split(',');
                    SpectrumData    standardSpectrum = new SpectrumData();
                    List <PeakData> spectrumPeakList = new List <PeakData>();
                    for (int j = i + 1; j < txtList.Count; j++)
                    {
                        string[] line = txtList[j].Split(default(char[]), StringSplitOptions.RemoveEmptyEntries);
                        if (line.GetLength(0) > 1)
                        {
                            PeakData peak = new PeakData();
                            peak.mz         = double.Parse(line[0]);
                            peak.intensity  = double.Parse(line[1]);
                            peak.sampleName = txtList[i].Replace(",", "_");
                            peak.rt         = double.Parse(tittleLine[tittleLine.GetLength(0) - 2].Split(default(char[]), StringSplitOptions.RemoveEmptyEntries)[0]) * 60;
                            spectrumPeakList.Add(peak);
                        }
                        else
                        {
                            break;
                        }
                    }
                    standardSpectrum.peakList = spectrumPeakList;
                    allSpectrumList.Add(standardSpectrum);
                }
            }
            return(allSpectrumList);
        }
        //Given 2 spectra, it calculates Cosine Product(Normalized dot product) as a result
        public static double CalCosineProduct(SpectrumData querySpec, SpectrumData librarySpec)
        {
            double NDP = 0;
            List <SpectrumData> commonSpectrumList = new List <SpectrumData>();

            commonSpectrumList = GetCommonSpectrum(querySpec, librarySpec);
            double a = 0, b = 0, c = 0;

            // in commonSpectrum list, query(0) is in front, library(1) is afterwards
            for (int i = 0; i < commonSpectrumList[0].peakList.Count; i++)
            {
                a += CalWeight(commonSpectrumList[1].peakList[i].intensity, commonSpectrumList[1].peakList[i].mz) * CalWeight(commonSpectrumList[0].peakList[i].intensity, commonSpectrumList[0].peakList[i].mz);
                //b += Math.Pow(CalWeight(commonSpectrumList[1].peakList[i].intensity, commonSpectrumList[1].peakList[i].mz),2);
                //c += Math.Pow(CalWeight(commonSpectrumList[0].peakList[i].intensity, commonSpectrumList[0].peakList[i].mz),2);
            }
            for (int i = 0; i < querySpec.peakList.Count; i++)
            {
                b += Math.Pow(CalWeight(querySpec.peakList[i].intensity, querySpec.peakList[i].mz), 2);
            }
            for (int i = 0; i < librarySpec.peakList.Count; i++)
            {
                c += Math.Pow(CalWeight(librarySpec.peakList[i].intensity, librarySpec.peakList[i].mz), 2);
            }
            if (b * c == 0)
            {
                NDP = 0;
            }
            else
            {
                NDP = Math.Round((a * a) / (b * c), 3);
            }
            return(NDP);
        }
        private static List <SpectrumData> GetCommonSpectrum(SpectrumData querySpec, SpectrumData librarySpec)
        {
            //commonSpectrum contains 2 items indicating equal length of common peaks between querySpec and librarySpec
            List <SpectrumData> commonSpectrumList = new List <SpectrumData>();
            SpectrumData        commonQuerySpec    = new SpectrumData();

            commonQuerySpec.peakList = new List <PeakData>();
            SpectrumData commonLibrarySpec = new SpectrumData();

            commonLibrarySpec.peakList = new List <PeakData>();
            for (int i = 0; i < librarySpec.peakList.Count; i++)
            {
                for (int j = 0; j < querySpec.peakList.Count; j++)
                {
                    //if 2 peaks' mz are within 0.01Da, then they will be treated as common peaks between 2 spectra
                    if (IsEqualMZ(librarySpec.peakList[i].mz, querySpec.peakList[j].mz))    //judge whether 2 mz values <=0.01
                    {
                        commonQuerySpec.peakList.Add(querySpec.peakList[j]);
                        commonLibrarySpec.peakList.Add(librarySpec.peakList[i]);
                        break;
                    }
                }
            }
            // in commonSpectrum list, query is in front, library is afterwards
            commonSpectrumList.Add(commonQuerySpec);
            commonSpectrumList.Add(commonLibrarySpec);
            if (commonQuerySpec.peakList.Count != commonLibrarySpec.peakList.Count)
            {
                Console.WriteLine("Common spectrum peak list is not equal between Query and Library!");
            }
            return(commonSpectrumList);
        }
        private static SpectrumData GenerateMergedSpectrum(SpectrumData allPeakDataInEachGroup)
        {
            // adMSMS data(20ev-50ev-MS) of each group is treated by first calculating relative intensities among group,
            // then calculating the average intensities in each CID, next taking the max of the value among CIDs
            SpectrumData mergedSpectrum = new SpectrumData();

            mergedSpectrum = CalMaxAverageIntensity(IdMSMS_Deconvolution.CalRelativeIntensity(allPeakDataInEachGroup));
            return(mergedSpectrum);
        }
        private static SpectrumData RemoveMZHigherThanPrecursor(SpectrumData spec)
        {
            SpectrumData    removedSpec = spec;
            List <PeakData> peaks       = removedSpec.peakList;

            peaks = peaks.Where(x => x.mz <= double.Parse(removedSpec.group.Split('_')[0])).ToList();
            removedSpec.peakList = peaks;
            return(removedSpec);
        }
        public static SpectrumData CalRelativeIntensity(SpectrumData allPeakDataInEachGroup)
        {
            SpectrumData relativeSpectrum = allPeakDataInEachGroup;
            double       maxIntensity     = relativeSpectrum.peakList.Max(x => x.intensity);

            for (int i = 0; i < relativeSpectrum.peakList.Count; i++)
            {
                relativeSpectrum.peakList[i].intensity = 100 * relativeSpectrum.peakList[i].intensity / maxIntensity;
            }
            return(relativeSpectrum);
        }
        private static SpectrumData AverageIntensityofMZline(SpectrumData allPeakDataInEachGroup)   // this method is to calculate average intensity for each mz line with the one 3 times higher than noise in one of the 20-50ev spectrum(calculated by pvals). to create a list containing each mz as list[i] with position of this mz
        {
            SpectrumData averageSpectrum = new SpectrumData();

            averageSpectrum.peakList = new List <PeakData>();
            var mzLines = from peaks in allPeakDataInEachGroup.peakList
                          group peaks by peaks.mz;

            foreach (var mzline in mzLines)
            {
                double        sumIntensity = 0, averageIntensity = 0;
                int           n                = 0;
                List <string> peakPositionList = new List <string>();
                StringBuilder peakPosition     = new StringBuilder();
                foreach (var peak in mzline)
                {
                    if (peak.isNoise == false)
                    {
                        sumIntensity += peak.intensity;
                        peakPositionList.Add(peak.sampleName.Split('_')[0]);
                        n++;
                    }
                }
                PeakData averPeak = mzline.ToList <PeakData>()[0]; // choose the first peak in each mz line
                averPeak.isNoise = true;
                if (n != 0)
                {
                    averageIntensity = sumIntensity / n;
                    if (n >= minSampleNrForCor)  // minimun samples for correlation analysis
                    {
                        averPeak.isNoise = false;
                    }
                }
                for (int i = 0; i < peakPositionList.Count; i++)
                {
                    if (i == peakPositionList.Count - 1)
                    {
                        peakPosition.Append(peakPositionList[i]);
                    }
                    else
                    {
                        peakPosition.Append(peakPositionList[i] + "_");
                    }
                }
                averPeak.intensity  = averageIntensity;
                averPeak.sampleName = peakPosition.ToString(); //name formats as 53_196_115
                averageSpectrum.peakList.Add(averPeak);
            }
            averageSpectrum.group = allPeakDataInEachGroup.group;
            return(averageSpectrum);
        }
        private static SpectrumData RemoveLessThan50s(SpectrumData allPeakDataInEachGroup)   // This is to remove all the retention time <50s peaks cause they are calibration peaks
        {
            SpectrumData removed50sSpectrum = new SpectrumData();

            for (int i = 0; i < allPeakDataInEachGroup.peakList.Count; i++)
            {
                double rt = allPeakDataInEachGroup.peakList[i].rt;
                if (rt < 50)
                {
                    allPeakDataInEachGroup.peakList.RemoveAt(i);
                    i = i - 1;
                }
            }
            removed50sSpectrum = allPeakDataInEachGroup;
            return(removed50sSpectrum);
        }
        private static SpectrumData RemoveIsotopes(SpectrumData allPeakDataInEachGroup)   // This is to remove all the isotopes in each group [M+1]+[M+2]+[M+3]+[M+4]+ in order to raise the NDP
        {
            SpectrumData removedIsotopesSpectrum = new SpectrumData();

            for (int i = 0; i < allPeakDataInEachGroup.peakList.Count; i++)
            {
                string isotopes = allPeakDataInEachGroup.peakList[i].isotopes;
                if (IsIsotope(isotopes))
                {
                    allPeakDataInEachGroup.peakList.RemoveAt(i);
                    i = i - 1;
                }
            }
            removedIsotopesSpectrum = allPeakDataInEachGroup;
            return(removedIsotopesSpectrum);
        }
        /**
         * Merge multiple idMSMS (if mz difference less than 0.01, pick the higher intensity one)
         * This function is used in two cases:1, merge 20-50ev idMSMSs, 2, merge idMSMSs in the same pcgroup with NDP>threshold
         */
        private static SpectrumData MergeSpectraToOne(List <SpectrumData> spectraWithSamePcgrp)
        {
            SpectrumData maxSpectrum = new SpectrumData();

            maxSpectrum.peakList = new List <PeakData>();
            maxSpectrum.group    = spectraWithSamePcgrp.OrderByDescending(x => double.Parse(x.group.Split('_')[0])).ToList()[0].group; //choose the highest mz as precursor(if this is 20-50ev, group is the same, then no influnce)
            List <PeakData> allPeaksInOneGroup = new List <PeakData>();

            for (int i = 0; i < spectraWithSamePcgrp.Count; i++)
            {
                allPeaksInOneGroup.AddRange(spectraWithSamePcgrp[i].peakList);
            }
            var allpeaks = from peaks in allPeaksInOneGroup
                           group peaks by peaks.mz;

            foreach (var peaks in allpeaks)
            {
                PeakData maxPeak = peaks.OrderByDescending(x => x.intensity).ToList()[0]; //choose the highest intensity peak in one mz
                maxSpectrum.peakList.Add(maxPeak);
            }
            for (int m = 0; m < maxSpectrum.peakList.Count; m++)   //remove redundancy(two mz <0.01)
            {
                for (int n = m + 1; n < maxSpectrum.peakList.Count; n++)
                {
                    if (Cal_Cosine_Product.IsEqualMZ(maxSpectrum.peakList[m].mz, maxSpectrum.peakList[n].mz))
                    {
                        if (maxSpectrum.peakList[m].intensity >= maxSpectrum.peakList[n].intensity)
                        {
                            maxSpectrum.peakList.RemoveAt(n);
                            n = n - 1;
                        }
                        else
                        {
                            maxSpectrum.peakList[m] = maxSpectrum.peakList[n];
                            n = n - 1;
                        }
                    }
                }
            }
            for (int i = 0; i < maxSpectrum.peakList.Count; i++)   //assign max precursor to all peaks
            {
                maxSpectrum.peakList[i].precursor = maxSpectrum.group;
            }
            maxSpectrum.peakList = maxSpectrum.peakList.OrderBy(x => x.mz).ToList();
            return(maxSpectrum);
        }
        private static SpectrumData CalMaxAverageIntensity(SpectrumData allRelativeIntensity)   // calculate maximum average intensity
        {
            SpectrumData maxAverageSpectrum = new SpectrumData();

            maxAverageSpectrum.peakList = new List <PeakData>();
            List <PeakData> spectrumList = new List <PeakData>();
            var             mz           = from peaks in allRelativeIntensity.peakList
                                           group peaks by peaks.mz;

            foreach (var peaks in mz)
            {
                PeakData        maxPeak = new PeakData();
                List <PeakData> ev20    = new List <PeakData>(); //store 20ev samples
                List <PeakData> ev30    = new List <PeakData>();
                List <PeakData> ev40    = new List <PeakData>();
                List <PeakData> ev50    = new List <PeakData>();
                List <PeakData> ms      = new List <PeakData>();
                foreach (var peak in peaks)
                {
                    if (peak.sampleName.Contains("20eV"))
                    {
                        ev20.Add(peak);
                    }
                    if (peak.sampleName.Contains("30eV"))
                    {
                        ev30.Add(peak);
                    }
                    if (peak.sampleName.Contains("40eV"))
                    {
                        ev40.Add(peak);
                    }
                    if (peak.sampleName.Contains("50eV"))
                    {
                        ev50.Add(peak);
                    }
                    if (peak.sampleName.Contains(".Ms."))
                    {
                        ms.Add(peak);
                    }
                }
                List <PeakData> mzAverageList = new List <PeakData>(); // store average intensity
                if (ev20.Count != 0)
                {
                    ev20[0].intensity = ev20.Average(x => x.intensity);
                    mzAverageList.Add(ev20[0]);
                }
                if (ev30.Count != 0)
                {
                    ev30[0].intensity = ev30.Average(x => x.intensity);
                    mzAverageList.Add(ev30[0]);
                }
                if (ev40.Count != 0)
                {
                    ev40[0].intensity = ev40.Average(x => x.intensity);
                    mzAverageList.Add(ev40[0]);
                }
                if (ev50.Count != 0)
                {
                    ev50[0].intensity = ev50.Average(x => x.intensity);
                    mzAverageList.Add(ev50[0]);
                }
                if (ms.Count != 0)
                {
                    ms[0].intensity = ms.Average(x => x.intensity);
                    mzAverageList.Add(ms[0]);
                }

                maxPeak            = mzAverageList[0];
                maxPeak.intensity  = mzAverageList.Max(x => x.intensity);
                maxPeak.sampleName = "MergedSpectrum" + maxPeak.pcgroup.ToString();
                spectrumList.Add(maxPeak);
            }
            maxAverageSpectrum.peakList = spectrumList;
            maxAverageSpectrum.group    = spectrumList[0].precursor;
            return(maxAverageSpectrum);
        }
        /**
         * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged)
         */
        public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath)  //read result CSV file generated by XCMS and CAMERA to a list of spectra
        {
            List <SpectrumData> allSpectrumList = new List <SpectrumData>();
            List <PeakData>     allPeakList     = new List <PeakData>();
            List <string>       CSVList         = new List <string>();

            CSVList = FileProcess.ReadFileToList(csvFilePath);
            //CSVList = BigFileReader.GetFileContent(csvFilePath);
            string[] titleLine = null;
            if (CSVList.Count != 0)   // extract tittle row
            {
                titleLine = CSVList[0].Split(',');
            }
            else
            {
                Console.WriteLine("CSVFile is empty!");
            }
            int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5,
                isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1;

            //store the index of interested tittles, allocate initial default values.
            for (int j = 0; j < titleLine.GetLength(0); j++)
            {
                titleLine[j] = titleLine[j].Replace("\"", "");
                switch (titleLine[j])
                {
                case "mz":
                    mzIndex = j;
                    break;

                case "rt":
                    rtIndex = j;
                    break;

                case "isotopes":
                    isotopesIndex = j;
                    break;

                case "adduct":
                    adductIndex = j;
                    break;

                case "pcgroup":
                    pcgroupIndex = j;
                    break;

                case "pval":
                    pvalIndex = j;
                    break;

                case "feature":
                    featureIndex = j;
                    break;

                case "diffrt":
                    diffrtIndex = j;
                    break;

                case "precursor":
                    precursorIndex = j;
                    break;

                default:
                    break;
                }
            }

            for (int i = 1; i < CSVList.Count; i++)   // extract data rows, each time extract 1 row and take title row as a pair.
            {
                string[]        line    = CSVList[i].Replace("\"", "").Split(',');
                List <PeakData> mzPeaks = new List <PeakData>();
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    if (titleLine[j].Contains("_") && titleLine[j].Length > 1)   // all the sample names contain "_" char
                    {
                        PeakData peak = new PeakData();
                        peak.mz         = double.Parse(line[mzIndex]);
                        peak.rt         = double.Parse(line[rtIndex]);
                        peak.isotopes   = line[isotopesIndex];
                        peak.adduct     = line[adductIndex];
                        peak.pcgroup    = int.Parse(line[pcgroupIndex]);
                        peak.intensity  = double.Parse(line[j]);
                        peak.sampleName = titleLine[j];
                        if (diffrtIndex != 0)  //MS doesnt contain pval, feature, diffrt
                        {
                            peak.pval    = double.Parse(line[pvalIndex]);
                            peak.feature = double.Parse(line[featureIndex]);
                            peak.diffrt  = double.Parse(line[diffrtIndex]);
                        }
                        peak.precursor = line[precursorIndex].Replace(" ", "");
                        if (double.Parse(line[j]) >= weight * bg)   //intensity which is 3 times higher than the average noise will be deemed as real peak!
                        {
                            peak.isNoise = false;
                        }
                        else
                        {
                            peak.isNoise = true;
                        }
                        mzPeaks.Add(peak);
                    }
                }
                PeakData averPeak = AverageIntensityofMZline(mzPeaks);
                allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left
            }
            var groupResult = from p in allPeakList
                              group p by p.precursor;

            foreach (var groups in groupResult)
            {
                List <PeakData> peakList = new List <PeakData>();
                peakList = groups.ToList <PeakData>();
                SpectrumData spec = new SpectrumData();
                spec.peakList = peakList;      //spec contains all original peak data
                spec.group    = groups.Key.Replace(" ", "");
                if (peakList.Count != 0)       //0  means no peaks, 1means single peak group, should be removed
                {
                    allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList
                }
                else
                {
                    continue;
                }
            }
            allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList();
            return(allSpectrumList);
        }
        public static double rtWindow       = 9;      //  rt window to precursor
        //

        public static List <SpectrumData> MergeIdMSMSOfDiffCIDs(string rootDataPath) // using idMSMS precursor-product method by calculated pvals, this method will merge 20-50ev data by selecting highest intensity(the peaks are different among CIDs)
        {
            List <SpectrumData> mergedSpecList = new List <SpectrumData>();
            List <SpectrumData> mergedCIDList  = new List <SpectrumData>();
            List <SpectrumData> AllCIDList     = new List <SpectrumData>(); // all CID
            List <SpectrumData> CID20evList    = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "20ev"));

            AllCIDList.AddRange(CID20evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList()); //remove rt<50
            CID20evList = null;
            GC.Collect();
            Console.WriteLine("20ev done!");
            List <SpectrumData> CID30evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "30ev"));

            AllCIDList.AddRange(CID30evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList());
            CID30evList = null;
            GC.Collect();
            Console.WriteLine("30ev done!");
            List <SpectrumData> CID40evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "40ev"));

            AllCIDList.AddRange(CID40evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList());
            CID40evList = null;
            GC.Collect();
            Console.WriteLine("40ev done!");
            List <SpectrumData> CID50evList = ProcessPeaktableToSpectralist(rootDataPath.Replace("MS", "50ev"));

            AllCIDList.AddRange(CID50evList.Where(x => double.Parse(x.group.Split('_')[1]) > 50).ToList());
            CID50evList = null;
            GC.Collect();
            Console.WriteLine("50ev done!");
            List <SpectrumData> MSList        = ProcessPeaktableToSpectralist(rootDataPath); // Note: pcgroup is different among different CIDs
            List <PeakData>     allMSPeakList = new List <PeakData>();

            for (int i = 0; i < MSList.Count; i++)
            {
                allMSPeakList.AddRange(MSList[i].peakList.Where(x => x.rt > 50).ToList()); //already averaged, meaning that each PeakData=one mz line
            }
            MSList = null;
            GC.Collect();
            var allSpec = from specs in AllCIDList
                          group specs by specs.peakList[0].precursor;

            foreach (var specs in allSpec)
            {
                PeakData precursorPeak   = allMSPeakList.Where(x => x.mz == double.Parse(specs.Key.Split('_')[0])).ToList()[0]; // find this precursor in MS
                double   precursorPeakRT = precursorPeak.rt;

                /**
                 * Filter presursor: 1,precursor should pass the "noise"threshold; 2, isotope is removed from precursor list(but isotopes can be fragments; 3, precursors will meet the criteria that "diffrt"(retention time difference) and "feature"(mass difference) are zero
                 */
                List <PeakData> peaksofeachprecursor = new List <PeakData>();
                for (int k = 0; k < specs.ToList().Count; k++)
                {
                    peaksofeachprecursor.AddRange(specs.ToList()[k].peakList);
                }
                if (precursorPeak.isNoise == true || IsIsotope(precursorPeak.isotopes) || !IsSelfCor(peaksofeachprecursor))   // determine whether precursor peak contains minimum number of "minSampleNrForCor" or is isotope, if not, not include
                {
                    continue;
                }

                /**
                 * Filter fragments: 1, fragments should not be "noise" peaks,2, fragment's "sample position" should be the subcollection of precursor's position,3, fragment's m/z less than precursor(The highest intensity of fragment is selected eventually,and then calculate its relativeintensity)
                 */
                List <string>   precursorPeakPosition = precursorPeak.sampleName.Split('_').ToList();
                double          precursorMZ           = precursorPeak.mz;
                List <PeakData> allPeaksInOneGroup    = new List <PeakData>();
                List <PeakData> MList = new List <PeakData>();
                foreach (var spec in specs)
                {
                    allPeaksInOneGroup.AddRange(spec.peakList.Where(x => (x.isNoise == false && x.diffrt < rtWindow)).ToList());// determine whether fragment peak contains minimum number of "minSampleNrForCor", if not, not include
                }
                for (int i = 0; i < allPeaksInOneGroup.Count; i++)
                {
                    List <string> fragmentPeakPosition = allPeaksInOneGroup[i].sampleName.Split('_').ToList();
                    double        fragmentMZ           = allPeaksInOneGroup[i].mz;                         //until here, the higher mz is not removed, so fragment mz can be higher than precursor mz here
                    if (precursorPeakPosition.Intersect(fragmentPeakPosition).Count() < minSampleNrForCor) // min samples for correlation
                    {
                        if (allPeaksInOneGroup[i].isotopes.Contains("[M]"))                                //after saving [M]+, check whether it is in the isotope list,if so, add this [M]+
                        {
                            MList.Add(allPeaksInOneGroup[i]);
                        }
                        allPeaksInOneGroup.RemoveAt(i);
                        i = i - 1;
                    }
                }
                for (int i = 0; i < MList.Count; i++)
                {
                    for (int j = 0; j < allPeaksInOneGroup.Count; j++)
                    {
                        if (IsMainPeak(allPeaksInOneGroup[j], MList[i]))
                        {
                            allPeaksInOneGroup.Add(MList[i]);
                            break;
                        }
                    }
                }
                SpectrumData maxSpectrum = new SpectrumData();
                maxSpectrum.peakList = allPeaksInOneGroup;
                maxSpectrum.group    = specs.Key;
                List <SpectrumData> sd = new List <SpectrumData>();
                sd.Add(maxSpectrum);
                maxSpectrum          = MergeSpectraToOne(sd); // merge 20-50ev
                maxSpectrum.peakList = maxSpectrum.peakList.OrderBy(x => x.mz).ToList();
                if (maxSpectrum.peakList.Count != 0)
                {
                    mergedCIDList.Add(CalRelativeIntensity(maxSpectrum)); //calculate relative intensity
                }
            }
            AllCIDList = null;
            GC.Collect();
            Console.WriteLine("Filter done!");

            /**
             * merging sub-idMS/MS function: merge those idMSMSs which are in the same pcgroup && ndp>thresholdNDP, then remove higher mz than precursor, calculate relative intensity, calculate neutral loss
             */
            int n      = 0;
            var pcgrps = mergedCIDList.GroupBy(x => int.Parse(x.group.Split('_')[2])); //same pcgroup

            foreach (var specs in pcgrps)
            {
                List <SpectrumData> specList = specs.ToList();
                if (specList.Count > 1)  //check if the pcgroup contains nore than one idMSMS
                {
                    List <SpectrumData> similarList = new List <SpectrumData>();
                    specList = specList.OrderBy(x => double.Parse(x.group.Split('_')[0])).ToList();//ascending order mz, merge from bottom up
                    for (int i = 0; i < specList.Count - 1; i++)
                    {
                        PeakData precursorPeak = allMSPeakList.Where(x => x.mz == double.Parse(specList[i].group.Split('_')[0])).ToList()[0]; // find this precursor in MS
                        if (!Data_Operation.IsPrecursor(precursorPeak.adduct))                                                                // if the mz is not precursor, merge upwards
                        {
                            for (int j = i + 1; j < specList.Count; j++)
                            {
                                double ndp = 0;
                                ndp = Cal_Cosine_Product.CalCosineProduct(specList[i], specList[j]);
                                if (ndp > ndpThreshold)
                                {
                                    List <SpectrumData> candiList = new List <SpectrumData>();
                                    candiList.Add(specList[i]);
                                    candiList.Add(specList[j]);
                                    SpectrumData MeSpec = MergeSpectraToOne(candiList);
                                    specList[j] = MeSpec; //merge a and b (merged result is stored in b and delete a)
                                    specList.RemoveAt(i);
                                    i--;
                                    n++;
                                    break;
                                }
                            }
                        }
                    }
                }
                for (int i = 0; i < specList.Count; i++)
                {
                    SpectrumData removeSpec = RemoveMZHigherThanPrecursor(specList[i]); //remove higher mz than precursor
                    if (removeSpec.peakList.Count != 0)
                    {
                        SpectrumData finalSpec = CalRelativeIntensity(removeSpec);// cal relative intensity
                        finalSpec.peakList        = finalSpec.peakList.OrderByDescending(x => x.mz).ToList();
                        finalSpec.neutralLossList = Data_Operation.FindNeutralLoss(finalSpec);
                        mergedSpecList.Add(finalSpec);
                    }
                }
            }
            Console.WriteLine("n:" + n);
            Console.WriteLine("merge done!");
            return(mergedSpecList);
        }