Exemple #1
0
        public static List <double[]> HeatMapPearsonCorrelation(string readfilePath, string outpath)
        {
            List <string[]> meanList = FileProcess.ReadCSV(readfilePath);
            List <double[]> pclist   = new List <double[]>();

            for (int i = 0; i < meanList.Count; i++)
            {
                double[] pcline = new double[meanList.Count];
                double[] line   = new double[meanList[i].GetLength(0)];
                for (int m = 0; m < meanList[i].GetLength(0); m++)
                {
                    line[m] = double.Parse(meanList[i][m]);
                }
                for (int j = 0; j < meanList.Count; j++)
                {
                    double[] lineX = new double[meanList[j].GetLength(0)];
                    for (int n = 0; n < meanList[j].GetLength(0); n++)
                    {
                        lineX[n] = double.Parse(meanList[j][n]);
                    }
                    double pc = Cal_Pearson_Correlation.CalPearsonCorrelation(line, lineX);
                    pcline[j] = pc;
                }
                pclist.Add(pcline);
            }
            return(pclist);
        }
Exemple #2
0
        /* This method is to fill those 0 values from XCMS peaktable ( after "fill peaks" function from XCMS, some of the intensities are still 0 )
         *  This is done by generating 2 peaktables with file1  with "fillpeaks" function, file2 not, then sum all the "NA" areas in file1 and take average then assign to zero area.
         * //*/
        public static void Fill0Peaks(string fillPeaksDataPath, string noneFillPeaksDataPath, string outputFilePath)
        {
            List <string[]> fill0ValueList        = new List <string[]>();
            List <string>   fillPeaksDataList     = FileProcess.ReadFileToList(fillPeaksDataPath);
            List <string>   noneFillPeaksDataList = FileProcess.ReadFileToList(noneFillPeaksDataPath);

            if (fillPeaksDataList.Count != noneFillPeaksDataList.Count)
            {
                Console.WriteLine("Fillpeaks and noneFillpeaks data are not match the same length! check the original data!");
                return;
            }
            string[] titleLine_fill = null; string[] titleLine_nonefill = null;
            if (fillPeaksDataList.Count != 0)   // extract tittle row
            {
                titleLine_fill     = fillPeaksDataList[0].Split(',');
                titleLine_nonefill = noneFillPeaksDataList[0].Split(',');
            }
            double sumBackgroundNoise = 0, BackgroundNoise = 0;
            int    NAcount = 0;

            for (int i = 1; i < noneFillPeaksDataList.Count; i++)
            {
                string[] line = noneFillPeaksDataList[i].Split(',');
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    //if (titleLine[j].Replace("\"", "").StartsWith("X") &&!titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "NA") {
                    if (titleLine_nonefill[j].Contains("_") && line[j] == "NA")
                    {
                        for (int k = 0; k < line.GetLength(0); k++)
                        {
                            if (titleLine_nonefill[j].Replace("\"", "") == titleLine_fill[k].Replace("\"", ""))
                            {
                                sumBackgroundNoise += double.Parse(fillPeaksDataList[i].Split(',')[k]);
                                NAcount++;
                                break;
                            }
                        }
                    }
                }
            }
            if (NAcount != 0)
            {
                BackgroundNoise = sumBackgroundNoise / NAcount;
            }
            else
            {
                BackgroundNoise = 0;
            }
            for (int i = 0; i < fillPeaksDataList.Count; i++)
            {
                string[] line = fillPeaksDataList[i].Split(',');
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    //if (titleLine[j].Replace("\"", "").StartsWith("X") && !titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "0") {
                    if (titleLine_fill[j].Contains("_") && line[j] == "0")
                    {
                        line[j] = BackgroundNoise.ToString();
                    }
                }
                fill0ValueList.Add(line);
            }
            Console.WriteLine("BackgroundNoise: " + BackgroundNoise);
            FileProcess.WritePeakList(fill0ValueList, outputFilePath);
        }
        static void Main(string[] args)
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();
            string rootPath = @"E:\Different tissues\idMSMS different tissue\MIX Results\";

            /**
             * Fill 0 values in XCMS peaktable
             */
            //string fillpeaksPath = rootPath + "NormPeakArea.csv";
            //string nonefillpeaksPath = rootPath + "data_minfrac0.1_sn3_nonfill.csv";
            //string outPath = rootPath + "NormPeakArea_fill0.csv";
            //Data_Operation.Fill0Peaks(fillpeaksPath, nonefillpeaksPath, outPath);

            /**
             * export precursors (change parapeters to export all/without isotope/only adducts)
             */
            //string CSVFilePath = rootPath + "NormPeakArea.csv";
            //List<SpectrumData> allSpectraList = IdMSMS_Deconvolution.ProcessPeaktableToSpectralist(CSVFilePath);
            //Data_Operation.FindPrecursorMZ(allSpectraList, outPrecursorPath);

            /**
             * Assemble idMS/MS with multi-eV
             */
            string CSVFilePath                = rootPath + "AllDeconvolutedSpectra_MS_pvals_9s.csv";
            string outputFilePath             = rootPath + "idmsms_3_6_0.9_minCor_rmv50_50ev.csv";
            string NLOutputFilePath           = rootPath + "NL_motif.csv";
            string NDPmatrixPath              = rootPath + "NDPmatrix.csv";
            List <SpectrumData> allMergedList = IdMSMS_Deconvolution.MergeIdMSMSOfDiffCIDs(CSVFilePath);

            FileProcess.WriteSpectrumList(allMergedList, outputFilePath);   //output idMSMS spectra
            FileProcess.OutputNeutralLoss(allMergedList, NLOutputFilePath); //output NL_motif
            string commonNLPath = rootPath + "Common NL table.csv";
            string outNLHitPath = rootPath + "NLmatrix.csv";

            FileProcess.OutputSelectedNLto360Spec(allMergedList, commonNLPath, outNLHitPath); //output binary NL hit table
            //List<SpectrumData> allSpectraList = FileProcess.ReadSpectralCSVFile(CSVFilePath); // use if there is spectra list to read
            //List<SpectrumData> allKnownSpectraList = FileProcess.ReadKnownMSMSSpectraList(knownMSMSPath); // use if there is known spectra list
            List <SpectrumData> allKnownSpectraList = allMergedList;
            List <double[]>     NDPResultList       = new List <double[]>();

            string[] queryTitleLine = new string[allMergedList.Count]; // add QueryTitle line to result
            for (int i = 0; i < allMergedList.Count; i++)
            {
                queryTitleLine[i] = allMergedList[i].group.ToString();
            }
            string[] libraryTitleLine = new string[allKnownSpectraList.Count]; // add Library title to result
            for (int i = 0; i < allKnownSpectraList.Count; i++)
            {
                libraryTitleLine[i] = allKnownSpectraList[i].peakList[0].sampleName;
            }
            for (int i = 0; i < allMergedList.Count; i++)
            {
                double[] line = new double[allMergedList.Count];                                       //
                for (int j = 0; j < allMergedList.Count; j++)                                          //
                {
                    line[j] = Cal_Cosine_Product.CalCosineProduct(allMergedList[i], allMergedList[j]); //
                }
                NDPResultList.Add(line);
            }
            FileProcess.WriteNDPListToFile(NDPResultList, queryTitleLine, queryTitleLine, NDPmatrixPath); // output NDPmatrix

            watch.Stop();
            Console.WriteLine("weight: " + IdMSMS_Deconvolution.weight + " minSamp:" + IdMSMS_Deconvolution.minSampleNrForCor + " ndpThreh:" + IdMSMS_Deconvolution.ndpThreshold);
            Console.WriteLine("Deconvoluted spectra number: " + allMergedList.Count);
            Console.WriteLine("calculation time: " + watch.Elapsed);
            Console.WriteLine("OK");
            Console.Read();
        }
        /**
         * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged)
         */
        public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath)  //read result CSV file generated by XCMS and CAMERA to a list of spectra
        {
            List <SpectrumData> allSpectrumList = new List <SpectrumData>();
            List <PeakData>     allPeakList     = new List <PeakData>();
            List <string>       CSVList         = new List <string>();

            CSVList = FileProcess.ReadFileToList(csvFilePath);
            //CSVList = BigFileReader.GetFileContent(csvFilePath);
            string[] titleLine = null;
            if (CSVList.Count != 0)   // extract tittle row
            {
                titleLine = CSVList[0].Split(',');
            }
            else
            {
                Console.WriteLine("CSVFile is empty!");
            }
            int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5,
                isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1;

            //store the index of interested tittles, allocate initial default values.
            for (int j = 0; j < titleLine.GetLength(0); j++)
            {
                titleLine[j] = titleLine[j].Replace("\"", "");
                switch (titleLine[j])
                {
                case "mz":
                    mzIndex = j;
                    break;

                case "rt":
                    rtIndex = j;
                    break;

                case "isotopes":
                    isotopesIndex = j;
                    break;

                case "adduct":
                    adductIndex = j;
                    break;

                case "pcgroup":
                    pcgroupIndex = j;
                    break;

                case "pval":
                    pvalIndex = j;
                    break;

                case "feature":
                    featureIndex = j;
                    break;

                case "diffrt":
                    diffrtIndex = j;
                    break;

                case "precursor":
                    precursorIndex = j;
                    break;

                default:
                    break;
                }
            }

            for (int i = 1; i < CSVList.Count; i++)   // extract data rows, each time extract 1 row and take title row as a pair.
            {
                string[]        line    = CSVList[i].Replace("\"", "").Split(',');
                List <PeakData> mzPeaks = new List <PeakData>();
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    if (titleLine[j].Contains("_") && titleLine[j].Length > 1)   // all the sample names contain "_" char
                    {
                        PeakData peak = new PeakData();
                        peak.mz         = double.Parse(line[mzIndex]);
                        peak.rt         = double.Parse(line[rtIndex]);
                        peak.isotopes   = line[isotopesIndex];
                        peak.adduct     = line[adductIndex];
                        peak.pcgroup    = int.Parse(line[pcgroupIndex]);
                        peak.intensity  = double.Parse(line[j]);
                        peak.sampleName = titleLine[j];
                        if (diffrtIndex != 0)  //MS doesnt contain pval, feature, diffrt
                        {
                            peak.pval    = double.Parse(line[pvalIndex]);
                            peak.feature = double.Parse(line[featureIndex]);
                            peak.diffrt  = double.Parse(line[diffrtIndex]);
                        }
                        peak.precursor = line[precursorIndex].Replace(" ", "");
                        if (double.Parse(line[j]) >= weight * bg)   //intensity which is 3 times higher than the average noise will be deemed as real peak!
                        {
                            peak.isNoise = false;
                        }
                        else
                        {
                            peak.isNoise = true;
                        }
                        mzPeaks.Add(peak);
                    }
                }
                PeakData averPeak = AverageIntensityofMZline(mzPeaks);
                allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left
            }
            var groupResult = from p in allPeakList
                              group p by p.precursor;

            foreach (var groups in groupResult)
            {
                List <PeakData> peakList = new List <PeakData>();
                peakList = groups.ToList <PeakData>();
                SpectrumData spec = new SpectrumData();
                spec.peakList = peakList;      //spec contains all original peak data
                spec.group    = groups.Key.Replace(" ", "");
                if (peakList.Count != 0)       //0  means no peaks, 1means single peak group, should be removed
                {
                    allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList
                }
                else
                {
                    continue;
                }
            }
            allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList();
            return(allSpectrumList);
        }