/* This method is to fill those 0 values from XCMS peaktable ( after "fill peaks" function from XCMS, some of the intensities are still 0 ) * This is done by generating 2 peaktables with file1 with "fillpeaks" function, file2 not, then sum all the "NA" areas in file1 and take average then assign to zero area. * //*/ public static void Fill0Peaks(string fillPeaksDataPath, string noneFillPeaksDataPath, string outputFilePath) { List <string[]> fill0ValueList = new List <string[]>(); List <string> fillPeaksDataList = FileProcess.ReadFileToList(fillPeaksDataPath); List <string> noneFillPeaksDataList = FileProcess.ReadFileToList(noneFillPeaksDataPath); if (fillPeaksDataList.Count != noneFillPeaksDataList.Count) { Console.WriteLine("Fillpeaks and noneFillpeaks data are not match the same length! check the original data!"); return; } string[] titleLine_fill = null; string[] titleLine_nonefill = null; if (fillPeaksDataList.Count != 0) // extract tittle row { titleLine_fill = fillPeaksDataList[0].Split(','); titleLine_nonefill = noneFillPeaksDataList[0].Split(','); } double sumBackgroundNoise = 0, BackgroundNoise = 0; int NAcount = 0; for (int i = 1; i < noneFillPeaksDataList.Count; i++) { string[] line = noneFillPeaksDataList[i].Split(','); for (int j = 0; j < line.GetLength(0); j++) { //if (titleLine[j].Replace("\"", "").StartsWith("X") &&!titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "NA") { if (titleLine_nonefill[j].Contains("_") && line[j] == "NA") { for (int k = 0; k < line.GetLength(0); k++) { if (titleLine_nonefill[j].Replace("\"", "") == titleLine_fill[k].Replace("\"", "")) { sumBackgroundNoise += double.Parse(fillPeaksDataList[i].Split(',')[k]); NAcount++; break; } } } } } if (NAcount != 0) { BackgroundNoise = sumBackgroundNoise / NAcount; } else { BackgroundNoise = 0; } for (int i = 0; i < fillPeaksDataList.Count; i++) { string[] line = fillPeaksDataList[i].Split(','); for (int j = 0; j < line.GetLength(0); j++) { //if (titleLine[j].Replace("\"", "").StartsWith("X") && !titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "0") { if (titleLine_fill[j].Contains("_") && line[j] == "0") { line[j] = BackgroundNoise.ToString(); } } fill0ValueList.Add(line); } Console.WriteLine("BackgroundNoise: " + BackgroundNoise); FileProcess.WritePeakList(fill0ValueList, outputFilePath); }
/** * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged) */ public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath) //read result CSV file generated by XCMS and CAMERA to a list of spectra { List <SpectrumData> allSpectrumList = new List <SpectrumData>(); List <PeakData> allPeakList = new List <PeakData>(); List <string> CSVList = new List <string>(); CSVList = FileProcess.ReadFileToList(csvFilePath); //CSVList = BigFileReader.GetFileContent(csvFilePath); string[] titleLine = null; if (CSVList.Count != 0) // extract tittle row { titleLine = CSVList[0].Split(','); } else { Console.WriteLine("CSVFile is empty!"); } int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5, isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1; //store the index of interested tittles, allocate initial default values. for (int j = 0; j < titleLine.GetLength(0); j++) { titleLine[j] = titleLine[j].Replace("\"", ""); switch (titleLine[j]) { case "mz": mzIndex = j; break; case "rt": rtIndex = j; break; case "isotopes": isotopesIndex = j; break; case "adduct": adductIndex = j; break; case "pcgroup": pcgroupIndex = j; break; case "pval": pvalIndex = j; break; case "feature": featureIndex = j; break; case "diffrt": diffrtIndex = j; break; case "precursor": precursorIndex = j; break; default: break; } } for (int i = 1; i < CSVList.Count; i++) // extract data rows, each time extract 1 row and take title row as a pair. { string[] line = CSVList[i].Replace("\"", "").Split(','); List <PeakData> mzPeaks = new List <PeakData>(); for (int j = 0; j < line.GetLength(0); j++) { if (titleLine[j].Contains("_") && titleLine[j].Length > 1) // all the sample names contain "_" char { PeakData peak = new PeakData(); peak.mz = double.Parse(line[mzIndex]); peak.rt = double.Parse(line[rtIndex]); peak.isotopes = line[isotopesIndex]; peak.adduct = line[adductIndex]; peak.pcgroup = int.Parse(line[pcgroupIndex]); peak.intensity = double.Parse(line[j]); peak.sampleName = titleLine[j]; if (diffrtIndex != 0) //MS doesnt contain pval, feature, diffrt { peak.pval = double.Parse(line[pvalIndex]); peak.feature = double.Parse(line[featureIndex]); peak.diffrt = double.Parse(line[diffrtIndex]); } peak.precursor = line[precursorIndex].Replace(" ", ""); if (double.Parse(line[j]) >= weight * bg) //intensity which is 3 times higher than the average noise will be deemed as real peak! { peak.isNoise = false; } else { peak.isNoise = true; } mzPeaks.Add(peak); } } PeakData averPeak = AverageIntensityofMZline(mzPeaks); allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left } var groupResult = from p in allPeakList group p by p.precursor; foreach (var groups in groupResult) { List <PeakData> peakList = new List <PeakData>(); peakList = groups.ToList <PeakData>(); SpectrumData spec = new SpectrumData(); spec.peakList = peakList; //spec contains all original peak data spec.group = groups.Key.Replace(" ", ""); if (peakList.Count != 0) //0 means no peaks, 1means single peak group, should be removed { allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList } else { continue; } } allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList(); return(allSpectrumList); }