public static List <double[]> HeatMapPearsonCorrelation(string readfilePath, string outpath) { List <string[]> meanList = FileProcess.ReadCSV(readfilePath); List <double[]> pclist = new List <double[]>(); for (int i = 0; i < meanList.Count; i++) { double[] pcline = new double[meanList.Count]; double[] line = new double[meanList[i].GetLength(0)]; for (int m = 0; m < meanList[i].GetLength(0); m++) { line[m] = double.Parse(meanList[i][m]); } for (int j = 0; j < meanList.Count; j++) { double[] lineX = new double[meanList[j].GetLength(0)]; for (int n = 0; n < meanList[j].GetLength(0); n++) { lineX[n] = double.Parse(meanList[j][n]); } double pc = Cal_Pearson_Correlation.CalPearsonCorrelation(line, lineX); pcline[j] = pc; } pclist.Add(pcline); } return(pclist); }
/* This method is to fill those 0 values from XCMS peaktable ( after "fill peaks" function from XCMS, some of the intensities are still 0 ) * This is done by generating 2 peaktables with file1 with "fillpeaks" function, file2 not, then sum all the "NA" areas in file1 and take average then assign to zero area. * //*/ public static void Fill0Peaks(string fillPeaksDataPath, string noneFillPeaksDataPath, string outputFilePath) { List <string[]> fill0ValueList = new List <string[]>(); List <string> fillPeaksDataList = FileProcess.ReadFileToList(fillPeaksDataPath); List <string> noneFillPeaksDataList = FileProcess.ReadFileToList(noneFillPeaksDataPath); if (fillPeaksDataList.Count != noneFillPeaksDataList.Count) { Console.WriteLine("Fillpeaks and noneFillpeaks data are not match the same length! check the original data!"); return; } string[] titleLine_fill = null; string[] titleLine_nonefill = null; if (fillPeaksDataList.Count != 0) // extract tittle row { titleLine_fill = fillPeaksDataList[0].Split(','); titleLine_nonefill = noneFillPeaksDataList[0].Split(','); } double sumBackgroundNoise = 0, BackgroundNoise = 0; int NAcount = 0; for (int i = 1; i < noneFillPeaksDataList.Count; i++) { string[] line = noneFillPeaksDataList[i].Split(','); for (int j = 0; j < line.GetLength(0); j++) { //if (titleLine[j].Replace("\"", "").StartsWith("X") &&!titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "NA") { if (titleLine_nonefill[j].Contains("_") && line[j] == "NA") { for (int k = 0; k < line.GetLength(0); k++) { if (titleLine_nonefill[j].Replace("\"", "") == titleLine_fill[k].Replace("\"", "")) { sumBackgroundNoise += double.Parse(fillPeaksDataList[i].Split(',')[k]); NAcount++; break; } } } } } if (NAcount != 0) { BackgroundNoise = sumBackgroundNoise / NAcount; } else { BackgroundNoise = 0; } for (int i = 0; i < fillPeaksDataList.Count; i++) { string[] line = fillPeaksDataList[i].Split(','); for (int j = 0; j < line.GetLength(0); j++) { //if (titleLine[j].Replace("\"", "").StartsWith("X") && !titleLine[j].Replace("\"", "").Contains("X.") && line[j] == "0") { if (titleLine_fill[j].Contains("_") && line[j] == "0") { line[j] = BackgroundNoise.ToString(); } } fill0ValueList.Add(line); } Console.WriteLine("BackgroundNoise: " + BackgroundNoise); FileProcess.WritePeakList(fill0ValueList, outputFilePath); }
static void Main(string[] args) { Stopwatch watch = new Stopwatch(); watch.Start(); string rootPath = @"E:\Different tissues\idMSMS different tissue\MIX Results\"; /** * Fill 0 values in XCMS peaktable */ //string fillpeaksPath = rootPath + "NormPeakArea.csv"; //string nonefillpeaksPath = rootPath + "data_minfrac0.1_sn3_nonfill.csv"; //string outPath = rootPath + "NormPeakArea_fill0.csv"; //Data_Operation.Fill0Peaks(fillpeaksPath, nonefillpeaksPath, outPath); /** * export precursors (change parapeters to export all/without isotope/only adducts) */ //string CSVFilePath = rootPath + "NormPeakArea.csv"; //List<SpectrumData> allSpectraList = IdMSMS_Deconvolution.ProcessPeaktableToSpectralist(CSVFilePath); //Data_Operation.FindPrecursorMZ(allSpectraList, outPrecursorPath); /** * Assemble idMS/MS with multi-eV */ string CSVFilePath = rootPath + "AllDeconvolutedSpectra_MS_pvals_9s.csv"; string outputFilePath = rootPath + "idmsms_3_6_0.9_minCor_rmv50_50ev.csv"; string NLOutputFilePath = rootPath + "NL_motif.csv"; string NDPmatrixPath = rootPath + "NDPmatrix.csv"; List <SpectrumData> allMergedList = IdMSMS_Deconvolution.MergeIdMSMSOfDiffCIDs(CSVFilePath); FileProcess.WriteSpectrumList(allMergedList, outputFilePath); //output idMSMS spectra FileProcess.OutputNeutralLoss(allMergedList, NLOutputFilePath); //output NL_motif string commonNLPath = rootPath + "Common NL table.csv"; string outNLHitPath = rootPath + "NLmatrix.csv"; FileProcess.OutputSelectedNLto360Spec(allMergedList, commonNLPath, outNLHitPath); //output binary NL hit table //List<SpectrumData> allSpectraList = FileProcess.ReadSpectralCSVFile(CSVFilePath); // use if there is spectra list to read //List<SpectrumData> allKnownSpectraList = FileProcess.ReadKnownMSMSSpectraList(knownMSMSPath); // use if there is known spectra list List <SpectrumData> allKnownSpectraList = allMergedList; List <double[]> NDPResultList = new List <double[]>(); string[] queryTitleLine = new string[allMergedList.Count]; // add QueryTitle line to result for (int i = 0; i < allMergedList.Count; i++) { queryTitleLine[i] = allMergedList[i].group.ToString(); } string[] libraryTitleLine = new string[allKnownSpectraList.Count]; // add Library title to result for (int i = 0; i < allKnownSpectraList.Count; i++) { libraryTitleLine[i] = allKnownSpectraList[i].peakList[0].sampleName; } for (int i = 0; i < allMergedList.Count; i++) { double[] line = new double[allMergedList.Count]; // for (int j = 0; j < allMergedList.Count; j++) // { line[j] = Cal_Cosine_Product.CalCosineProduct(allMergedList[i], allMergedList[j]); // } NDPResultList.Add(line); } FileProcess.WriteNDPListToFile(NDPResultList, queryTitleLine, queryTitleLine, NDPmatrixPath); // output NDPmatrix watch.Stop(); Console.WriteLine("weight: " + IdMSMS_Deconvolution.weight + " minSamp:" + IdMSMS_Deconvolution.minSampleNrForCor + " ndpThreh:" + IdMSMS_Deconvolution.ndpThreshold); Console.WriteLine("Deconvoluted spectra number: " + allMergedList.Count); Console.WriteLine("calculation time: " + watch.Elapsed); Console.WriteLine("OK"); Console.Read(); }
/** * After using this function,list contains a <PeakData> List, each index in the list coresponds to one mz (value averaged) */ public static List <SpectrumData> ProcessPeaktableToSpectralist(string csvFilePath) //read result CSV file generated by XCMS and CAMERA to a list of spectra { List <SpectrumData> allSpectrumList = new List <SpectrumData>(); List <PeakData> allPeakList = new List <PeakData>(); List <string> CSVList = new List <string>(); CSVList = FileProcess.ReadFileToList(csvFilePath); //CSVList = BigFileReader.GetFileContent(csvFilePath); string[] titleLine = null; if (CSVList.Count != 0) // extract tittle row { titleLine = CSVList[0].Split(','); } else { Console.WriteLine("CSVFile is empty!"); } int diffrtIndex = 0, pvalIndex = 0, featureIndex = 0, mzIndex = 4, rtIndex = 5, isotopesIndex = titleLine.GetLength(0) - 4, adductIndex = titleLine.GetLength(0) - 3, pcgroupIndex = titleLine.GetLength(0) - 2, precursorIndex = titleLine.GetLength(0) - 1; //store the index of interested tittles, allocate initial default values. for (int j = 0; j < titleLine.GetLength(0); j++) { titleLine[j] = titleLine[j].Replace("\"", ""); switch (titleLine[j]) { case "mz": mzIndex = j; break; case "rt": rtIndex = j; break; case "isotopes": isotopesIndex = j; break; case "adduct": adductIndex = j; break; case "pcgroup": pcgroupIndex = j; break; case "pval": pvalIndex = j; break; case "feature": featureIndex = j; break; case "diffrt": diffrtIndex = j; break; case "precursor": precursorIndex = j; break; default: break; } } for (int i = 1; i < CSVList.Count; i++) // extract data rows, each time extract 1 row and take title row as a pair. { string[] line = CSVList[i].Replace("\"", "").Split(','); List <PeakData> mzPeaks = new List <PeakData>(); for (int j = 0; j < line.GetLength(0); j++) { if (titleLine[j].Contains("_") && titleLine[j].Length > 1) // all the sample names contain "_" char { PeakData peak = new PeakData(); peak.mz = double.Parse(line[mzIndex]); peak.rt = double.Parse(line[rtIndex]); peak.isotopes = line[isotopesIndex]; peak.adduct = line[adductIndex]; peak.pcgroup = int.Parse(line[pcgroupIndex]); peak.intensity = double.Parse(line[j]); peak.sampleName = titleLine[j]; if (diffrtIndex != 0) //MS doesnt contain pval, feature, diffrt { peak.pval = double.Parse(line[pvalIndex]); peak.feature = double.Parse(line[featureIndex]); peak.diffrt = double.Parse(line[diffrtIndex]); } peak.precursor = line[precursorIndex].Replace(" ", ""); if (double.Parse(line[j]) >= weight * bg) //intensity which is 3 times higher than the average noise will be deemed as real peak! { peak.isNoise = false; } else { peak.isNoise = true; } mzPeaks.Add(peak); } } PeakData averPeak = AverageIntensityofMZline(mzPeaks); allPeakList.Add(averPeak);// after AverageIntensityofMZline, only one peakdata left } var groupResult = from p in allPeakList group p by p.precursor; foreach (var groups in groupResult) { List <PeakData> peakList = new List <PeakData>(); peakList = groups.ToList <PeakData>(); SpectrumData spec = new SpectrumData(); spec.peakList = peakList; //spec contains all original peak data spec.group = groups.Key.Replace(" ", ""); if (peakList.Count != 0) //0 means no peaks, 1means single peak group, should be removed { allSpectrumList.Add(spec); //here merged all samples and CIDs into allSpectrumList } else { continue; } } allSpectrumList = allSpectrumList.OrderBy(x => x.group).ToList(); return(allSpectrumList); }