Example #1
0
        /* This method is to fill those 0 values from XCMS peaktable ( after "fill peaks" function from XCMS, some of the intensities are still 0 )
         *  This is done by generating 2 peaktables with file1  with "fillpeaks" function, file2 not, then sum all the "NA" areas in file1 and take average then assign to zero area.
         * //*/
        public static void Fill0Peaks(string fillPeaksDataPath, string noneFillPeaksDataPath, string outputFilePath)
        {
            List <string[]> fill0ValueList        = new List <string[]>();
            List <string>   fillPeaksDataList     = FileProcess.ReadFileToList(fillPeaksDataPath);
            List <string>   noneFillPeaksDataList = FileProcess.ReadFileToList(noneFillPeaksDataPath);

            if (fillPeaksDataList.Count != noneFillPeaksDataList.Count)
            {
                Console.WriteLine("Fillpeaks and noneFillpeaks data are not match the same length! check the original data!");
                return;
            }
            string[] titleLine_fill = null; string[] titleLine_nonefill = null;
            if (fillPeaksDataList.Count != 0)   // extract tittle row
            {
                titleLine_fill     = fillPeaksDataList[0].Split(',');
                titleLine_nonefill = noneFillPeaksDataList[0].Split(',');
            }
            double sumBackgroundNoise = 0, BackgroundNoise = 0;
            int    NAcount = 0;

            for (int i = 1; i < noneFillPeaksDataList.Count; i++)
            {
                string[] line = noneFillPeaksDataList[i].Split(',');
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    //if (titleLine[j].Replace("\"", "").StartsWith("X") &&!titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "NA") {
                    if (titleLine_nonefill[j].Contains("_") && line[j] == "NA")
                    {
                        for (int k = 0; k < line.GetLength(0); k++)
                        {
                            if (titleLine_nonefill[j].Replace("\"", "") == titleLine_fill[k].Replace("\"", ""))
                            {
                                sumBackgroundNoise += double.Parse(fillPeaksDataList[i].Split(',')[k]);
                                NAcount++;
                                break;
                            }
                        }
                    }
                }
            }
            if (NAcount != 0)
            {
                BackgroundNoise = sumBackgroundNoise / NAcount;
            }
            else
            {
                BackgroundNoise = 0;
            }
            for (int i = 0; i < fillPeaksDataList.Count; i++)
            {
                string[] line = fillPeaksDataList[i].Split(',');
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    //if (titleLine[j].Replace("\"", "").StartsWith("X") && !titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "0") {
                    if (titleLine_fill[j].Contains("_") && line[j] == "0")
                    {
                        line[j] = BackgroundNoise.ToString();
                    }
                }
                fill0ValueList.Add(line);
            }
            Console.WriteLine("BackgroundNoise: " + BackgroundNoise);
            FileProcess.WritePeakList(fill0ValueList, outputFilePath);
        }
        /**
         * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged)
         */
        public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath)  //read result CSV file generated by XCMS and CAMERA to a list of spectra
        {
            List <SpectrumData> allSpectrumList = new List <SpectrumData>();
            List <PeakData>     allPeakList     = new List <PeakData>();
            List <string>       CSVList         = new List <string>();

            CSVList = FileProcess.ReadFileToList(csvFilePath);
            //CSVList = BigFileReader.GetFileContent(csvFilePath);
            string[] titleLine = null;
            if (CSVList.Count != 0)   // extract tittle row
            {
                titleLine = CSVList[0].Split(',');
            }
            else
            {
                Console.WriteLine("CSVFile is empty!");
            }
            int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5,
                isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1;

            //store the index of interested tittles, allocate initial default values.
            for (int j = 0; j < titleLine.GetLength(0); j++)
            {
                titleLine[j] = titleLine[j].Replace("\"", "");
                switch (titleLine[j])
                {
                case "mz":
                    mzIndex = j;
                    break;

                case "rt":
                    rtIndex = j;
                    break;

                case "isotopes":
                    isotopesIndex = j;
                    break;

                case "adduct":
                    adductIndex = j;
                    break;

                case "pcgroup":
                    pcgroupIndex = j;
                    break;

                case "pval":
                    pvalIndex = j;
                    break;

                case "feature":
                    featureIndex = j;
                    break;

                case "diffrt":
                    diffrtIndex = j;
                    break;

                case "precursor":
                    precursorIndex = j;
                    break;

                default:
                    break;
                }
            }

            for (int i = 1; i < CSVList.Count; i++)   // extract data rows, each time extract 1 row and take title row as a pair.
            {
                string[]        line    = CSVList[i].Replace("\"", "").Split(',');
                List <PeakData> mzPeaks = new List <PeakData>();
                for (int j = 0; j < line.GetLength(0); j++)
                {
                    if (titleLine[j].Contains("_") && titleLine[j].Length > 1)   // all the sample names contain "_" char
                    {
                        PeakData peak = new PeakData();
                        peak.mz         = double.Parse(line[mzIndex]);
                        peak.rt         = double.Parse(line[rtIndex]);
                        peak.isotopes   = line[isotopesIndex];
                        peak.adduct     = line[adductIndex];
                        peak.pcgroup    = int.Parse(line[pcgroupIndex]);
                        peak.intensity  = double.Parse(line[j]);
                        peak.sampleName = titleLine[j];
                        if (diffrtIndex != 0)  //MS doesnt contain pval, feature, diffrt
                        {
                            peak.pval    = double.Parse(line[pvalIndex]);
                            peak.feature = double.Parse(line[featureIndex]);
                            peak.diffrt  = double.Parse(line[diffrtIndex]);
                        }
                        peak.precursor = line[precursorIndex].Replace(" ", "");
                        if (double.Parse(line[j]) >= weight * bg)   //intensity which is 3 times higher than the average noise will be deemed as real peak!
                        {
                            peak.isNoise = false;
                        }
                        else
                        {
                            peak.isNoise = true;
                        }
                        mzPeaks.Add(peak);
                    }
                }
                PeakData averPeak = AverageIntensityofMZline(mzPeaks);
                allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left
            }
            var groupResult = from p in allPeakList
                              group p by p.precursor;

            foreach (var groups in groupResult)
            {
                List <PeakData> peakList = new List <PeakData>();
                peakList = groups.ToList <PeakData>();
                SpectrumData spec = new SpectrumData();
                spec.peakList = peakList;      //spec contains all original peak data
                spec.group    = groups.Key.Replace(" ", "");
                if (peakList.Count != 0)       //0  means no peaks, 1means single peak group, should be removed
                {
                    allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList
                }
                else
                {
                    continue;
                }
            }
            allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList();
            return(allSpectrumList);
        }