public static void printMyBoxplot(string outputFile, List <string> phenotypes) { //block (intercept) REngine from printing to the Console //we are just redirecting the output of it to some StringWriter var stdOut = Console.Out; Console.SetOut(new StringWriter()); //print boxplot function rEngineInstance.engine.Evaluate( @"boxplotTissueCharge <- function(phenoMetabVals, plotTitle, plotYlabel, signifSymb, signifSymbYlevel, signifSymbXlevel) { p <- ggplot(data=phenoMetabVals) + theme_bw(base_size=18) + ggtitle(plotTitle) + ylab(plotYlabel) + geom_boxplot(aes(factor(phenoMetabVals[,1]), phenoMetabVals[,2])) +" + geom_textLinesInGgplot2Function(phenotypes.Count) + @"theme(legend.position=""none"", axis.title.x = element_blank()); return (p); };"); //keep track of the last charge value so that we know when to print the tissue names string lastCharge = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x).Last(); //keep track of the first tissue value so that we know when to print the charge names string firstTissue = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x).First(); //open the pdf stream rEngineInstance.engine.Evaluate(@"pdf(file=""" + outputFile.Replace("\\", "/") + @""", width=14, height=9)"); //assisting list of tuples List <Tuple <string, double> > phenoMetabValPairs, pValPairs; //loop over custom metabolite IDs foreach (string mtblid in metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).Select(x => x.mtbltDetails.In_customId).Distinct().OrderBy(x => x)) { //extract all the metabolites with the mtblid custom metabolite ID msMetabolite mtbl = metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == mtblid).mtbltDetails; //initialize the list that will store the boxplots rEngineInstance.engine.Evaluate("accumulateBoxplots <- list()"); //loop over charges foreach (string charge in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x)) { //loop over tissues foreach (string tissue in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x)) { //set the plot title rEngineInstance.engine.SetSymbol("boxPlotTitleAndYlabel", returnTitleAndYlabelCharacterVector(charge, lastCharge, tissue, firstTissue)); //if the metabolite has been detected for the given combination of tissue and charge then do the plot if (metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype)) .SelectMany(x => x.ListOfMetabolites).Any(x => x.mtbltDetails.In_customId == mtbl.In_customId)) { //initialize the assisting variable phenoMetabValPairs = new List <Tuple <string, double> >(); pValPairs = new List <Tuple <string, double> >(); //loop over all the metabolites for tissue and charge and non-ignore phenotypes //in order to fill in the assisting variables //these variables serve for significance symbols in the plot and for where to plce it in the plot foreach (sampleForTissueAndCharge sftac in metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype))) { foreach (sampleForTissueAndCharge.parentMetabolite tdm in sftac.ListOfMetabolites.Where(x => x.mtbltDetails.In_customId == mtbl.In_customId)) { phenoMetabValPairs.Add(new Tuple <string, double>(sftac.Phenotype, tdm.mtbltVals.Imputed)); if (pValPairs.Count == 0) { for (int i = 0; i < phenotypes.Count; i++) { for (int j = (i + 1); j < phenotypes.Count; j++) { pValPairs.Add(new Tuple <string, double>(phenotypes[i].First() + "v" + phenotypes[j].First(), tdm.mtbltDetails.ListOfStats.PairwiseTestPvalue.First(x => x.group1 == phenotypes[i] && x.group2 == phenotypes[j]).pairValue)); } } if (publicVariables.numberOfClasses != publicVariables.numberOfClassesValues.two) { pValPairs.Add(new Tuple <string, double>("", tdm.mtbltDetails.ListOfStats.MultiGroupPvalue)); } } } } //significance symbols matrix rEngineInstance.engine.SetSymbol("signifSymbYlevel", returnSignificanceDataFrame(phenoMetabValPairs, pValPairs)); //sets the dataframe variable df in R rEngineInstance.engine.SetSymbol("df", returnIEnurable(phenoMetabValPairs)); //do not plot anything to the console //adds the boxplot in the list of plots //stop not printing stuff in teh console rEngineInstance.engine.Evaluate(@"pdf(NULL); accumulateBoxplots[[length(accumulateBoxplots) + 1]] <- boxplotTissueCharge(df, boxPlotTitleAndYlabel[1], boxPlotTitleAndYlabel[2], signifSymbYlevel[,1], signifSymbYlevel[,2], signifSymbYlevel[,3]); dev.off();"); } else //if the metabolite has not been detected for this combination of tissue and charge provide and empty plot { //empty plot: is defined in the initialization of rEngineInstance rEngineInstance.engine.Evaluate("accumulateBoxplots[[length(accumulateBoxplots) + 1]] <- nullPlot(boxPlotTitleAndYlabel[1], boxPlotTitleAndYlabel[2])"); } } } //do the plot rEngineInstance.engine.Evaluate(@"" + printBoxPlotGrid(metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().Count(), metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().Count()) + @"p <- grid.text(""" + mtbl.In_customId + @"_" + mtbl.In_Name + @""", x=unit(130,""mm""), y=unit(225,""mm""), just=c(""left"", ""top""), gp = gpar(fontface=""bold"", fontsize=24, col=""blue"")); print(p);"); } rEngineInstance.engine.Evaluate(@"dev.off()"); //Re-enable Console printings Console.SetOut(stdOut); }
public static void printMyScatterplot(string outputFile) { //block (intercept) REngine from printing to the Console //we are just redirecting the output of it to some StringWriter var stdOut = Console.Out; Console.SetOut(new StringWriter()); rEngineInstance.engine.Evaluate( @"scatterplotTissueCharge <- function(df, plotTitle, plotYlabel, coords, rcorrLabel) { corrLine <- coef(lm(df$metab_vals ~ df$clindt_vals)); p <- ggplot(df) + theme_bw(base_size = 18) + ggtitle(paste(plotTitle)) + ylab(plotYlabel) + geom_point(aes(x = df$clindt_vals, y = df$metab_vals, color = factor(df$pheno))) + geom_abline(intercept = corrLine[1], slope = corrLine[2]) + geom_text(data = NULL, x = coords[1], y = coords[2], label = rcorrLabel) + theme(legend.position = ""none"", axis.title.x = element_blank()); return (p); };"); //keep track of the last charge value so that we know when to print the tissue names string lastCharge = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x).Last(); //keep track of the first tissue value so that we know when to print the charge names string firstTissue = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x).First(); //assisting list of tuples List <rDataFrame> phenoMetabValPairs; Tuple <double, double> corrPval; corrValCoords _corrValCoord; foreach (sampleForTissueAndCharge.sampleClinicalData sClinData in metaboliteLevels.List_SampleForTissueAndCharge.First().ListOfNumClinicalData) { if (sClinData.typeOf == sampleForTissueAndCharge.sampleClinicalData.type.categorical) { continue; } //open the pdf stream rEngineInstance.engine.Evaluate(@"pdf(file=""" + outputFile.Replace("\\", "/") + sClinData.name + @".pdf"", width=14, height=9)"); //loop over custom metabolite IDs foreach (string mtblid in metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).Select(x => x.mtbltDetails.In_customId).Distinct().OrderBy(x => x)) { //extract all the metabolites with the mtblid custom metabolite ID msMetabolite mtbl = metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == mtblid).mtbltDetails; //initialize the list that will store the boxplots rEngineInstance.engine.Evaluate("accumulateScatterplots <- list()"); //loop over charges foreach (string charge in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x)) { //loop over tissues foreach (string tissue in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x)) { //set the plot title rEngineInstance.engine.SetSymbol("scatterPlotTitleAndYlabel", returnTitleAndYlabelCharacterVector(charge, lastCharge, tissue, firstTissue)); //if the metabolite has been detected for the given combination of tissue and charge then do the plot if (metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype)) .SelectMany(x => x.ListOfMetabolites).Any(x => x.mtbltDetails.In_customId == mtbl.In_customId)) { //initialize the assisting variable phenoMetabValPairs = new List <rDataFrame>(); //loop over all the metabolites for tissue and charge and non-ignore phenotypes //in order to fill in the assisting variables //these variables serve for significance symbols in the plot and for where to plce it in the plot foreach (sampleForTissueAndCharge sftac in metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype))) { phenoMetabValPairs.Add(new rDataFrame() { phenotype = sftac.Phenotype, metabolite = sftac.ListOfMetabolites.First(x => x.mtbltDetails.In_customId == mtbl.In_customId).mtbltVals.Imputed, clinical_data = sftac.ListOfNumClinicalData.First(x => x.name == sClinData.name).n_value }); } //retrieve correlation value and p-value between metabolite and clinical data corrPval = metaboliteLevels.List_SampleForTissueAndCharge.First(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype)) .ListOfMetabolites.First(x => x.mtbltDetails.In_customId == mtbl.In_customId).mtbltDetails.ListOfStats.CorrelationValues.Where(x => x.clinical_data_name == sClinData.name) .Select(x => new Tuple <double, double>(Math.Round(x.corr_value, 2), x.pValueUnadjust)).ToList().First(); //significance symbols and coordinates _corrValCoord = returnSignificanceDataFrame(phenoMetabValPairs, corrPval); rEngineInstance.engine.SetSymbol("xCorrCoord", _corrValCoord.xCoord); rEngineInstance.engine.SetSymbol("yCorrCoord", _corrValCoord.yCoord); rEngineInstance.engine.Evaluate("coords <- c(xCorrCoord, yCorrCoord)"); rEngineInstance.engine.SetSymbol("rcorrLabel", _corrValCoord.label); //sets the dataframe variable df in R CharacterVector pheno = rEngineInstance.engine.CreateCharacterVector(phenoMetabValPairs.Select(x => x.phenotype).ToArray()); rEngineInstance.engine.SetSymbol("pheno", pheno); NumericVector clindt_vals = rEngineInstance.engine.CreateNumericVector(phenoMetabValPairs.Select(x => x.clinical_data).ToArray()); rEngineInstance.engine.SetSymbol("clindt_vals", clindt_vals); NumericVector metab_vals = rEngineInstance.engine.CreateNumericVector(phenoMetabValPairs.Select(x => x.metabolite).ToArray()); rEngineInstance.engine.SetSymbol("metab_vals", metab_vals); rEngineInstance.engine.Evaluate("df <- cbind.data.frame(metab_vals, clindt_vals, pheno)"); //do not plot anything to the console //adds the boxplot in the list of plots //stop not printing stuff in teh console rEngineInstance.engine.Evaluate(@"pdf(NULL); accumulateScatterplots[[length(accumulateScatterplots) + 1]] <- scatterplotTissueCharge(df, scatterPlotTitleAndYlabel[1], scatterPlotTitleAndYlabel[2], coords, rcorrLabel); dev.off();"); } else //if the metabolite has not been detected for this combination of tissue and charge provide and empty plot { //empty plot: is defined in the initialization of rEngineInstance rEngineInstance.engine.Evaluate("accumulateScatterplots[[length(accumulateScatterplots) + 1]] <- nullPlot(scatterPlotTitleAndYlabel[1], scatterPlotTitleAndYlabel[2])"); } } } //do the plot rEngineInstance.engine.Evaluate(@"" + printBoxPlotGrid(metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().Count(), metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().Count()) + @"p <- grid.text(""x-" + sClinData.name + @" | y-" + mtbl.In_customId + @"_" + mtbl.In_Name + @""", x=unit(10,""mm""), y=unit(225,""mm""), just=c(""left"", ""top""), gp = gpar(fontface=""bold"", fontsize=24, col=""blue"")); print(p);"); } rEngineInstance.engine.Evaluate(@"dev.off()"); } //Re-enable Console printings Console.SetOut(stdOut); }
private static List <msMetabolite> ReadInputMetabolitesFromDatabase(List <string> listOfCsvLines) { List <msMetabolite> listOfMetabolitesPerTissueAndCharge = new List <msMetabolite>(); msMetabolite msMetab; bool addToList = false, isDuplicate = false; for (int i = publicVariables.indexToStartFrom; i < listOfCsvLines.First().Split(publicVariables.breakCharInFile).Length; i++) { msMetab = new msMetabolite() { In_Index = i, In_Name = listOfCsvLines.First().Split(publicVariables.breakCharInFile).ElementAt(i).Trim(), In_Type = listOfCsvLines.ElementAt(1).Split(publicVariables.breakCharInFile).ElementAt(i).Trim(), In_Formula = listOfCsvLines.ElementAt(2).Split(publicVariables.breakCharInFile).ElementAt(i).Replace(" ", ""), In_Mass = Convert.ToDouble(listOfCsvLines.ElementAt(3).Split(publicVariables.breakCharInFile).ElementAt(i)), In_Rt = Convert.ToDouble(listOfCsvLines.ElementAt(4).Split(publicVariables.breakCharInFile).ElementAt(i)), In_customId = listOfCsvLines.ElementAt(5).Split(publicVariables.breakCharInFile).ElementAt(i), In_Cas_id = listOfCsvLines.ElementAt(6).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Cas_id = listOfCsvLines.ElementAt(7).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Hmdb_id = listOfCsvLines.ElementAt(8).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Hmdb_id = listOfCsvLines.ElementAt(9).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Kegg_id = listOfCsvLines.ElementAt(10).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Kegg_id = listOfCsvLines.ElementAt(11).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Chebi_id = listOfCsvLines.ElementAt(12).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Chebi_id = listOfCsvLines.ElementAt(13).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Pubchem_id = listOfCsvLines.ElementAt(14).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Pubchem_id = listOfCsvLines.ElementAt(15).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Chemspider_id = listOfCsvLines.ElementAt(16).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Chemspider_id = listOfCsvLines.ElementAt(17).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Lipidmaps_id = listOfCsvLines.ElementAt(18).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Lipidmaps_id = listOfCsvLines.ElementAt(19).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_Metlin_id = listOfCsvLines.ElementAt(20).Split(publicVariables.breakCharInFile).ElementAt(i), In_add_Metlin_id = listOfCsvLines.ElementAt(21).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(), In_isProblematic = Convert.ToBoolean(listOfCsvLines.ElementAt(22).Split(publicVariables.breakCharInFile).ElementAt(i)), In_msMsConfirmed = Convert.ToBoolean(listOfCsvLines.ElementAt(23).Split(publicVariables.breakCharInFile).ElementAt(i)), In_inBlank = Convert.ToBoolean(listOfCsvLines.ElementAt(24).Split(publicVariables.breakCharInFile).ElementAt(i)), In_msProblematic = Convert.ToBoolean(listOfCsvLines.ElementAt(25).Split(publicVariables.breakCharInFile).ElementAt(i)), In_AZmSuperClass = listOfCsvLines.ElementAt(26).Split(publicVariables.breakCharInFile).ElementAt(i), In_AZmClass = listOfCsvLines.ElementAt(27).Split(publicVariables.breakCharInFile).ElementAt(i), In_AZmNameFixed = listOfCsvLines.ElementAt(28).Split(publicVariables.breakCharInFile).ElementAt(i) }; if (!string.IsNullOrEmpty(msMetab.In_customId) && !string.IsNullOrWhiteSpace(msMetab.In_customId) && msMetab.In_Type == "Metabolite") { if (listOfMetaboliteIDs.Any(x => x.Split('_').First() == msMetab.In_customId)) { if (listOfMetabolitesPerTissueAndCharge.Any(x => x.In_customId.Split('_').First() == msMetab.In_customId)) { msMetab.ToHMDB_metabolite(listOfMetabolitesPerTissueAndCharge.First(x => x.In_customId.Split('_').First() == msMetab.In_customId)); if (listOfMetabolitesPerTissueAndCharge.Count(x => x.In_customId.Split('_').First() == msMetab.In_customId) == 1) { msMetab.In_customId = msMetab.In_customId + "_1"; } else { msMetab.In_customId = msMetab.In_customId + "_" + Convert.ToString(listOfMetabolitesPerTissueAndCharge.Where(x => x.In_customId.Split('_').First() == msMetab.In_customId) .Select(x => x.In_customId).Where(x => x.Split('_').Length > 1).Select(x => Convert.ToInt32(x.Split('_').Last())).Max() + 1); } addToList = true; } else { msMetab.ToHMDB_metabolite(List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == msMetab.In_customId).mtbltDetails); addToList = false; } isDuplicate = false; } else { msMetab.getFromMetaboliteDB(); addToList = true; isDuplicate = false; } } else if (msMetab.In_Type == "IS") { addToList = false; isDuplicate = true; } if (addToList) { listOfMetaboliteIDs.Add(msMetab.In_customId); } if (!isDuplicate) { listOfMetabolitesPerTissueAndCharge.Add(msMetab); } } return(listOfMetabolitesPerTissueAndCharge); }