public static void printMyBoxplot(string outputFile, List <string> phenotypes)
        {
            //block (intercept) REngine from printing to the Console
            //we are just redirecting the output of it to some StringWriter
            var stdOut = Console.Out;

            Console.SetOut(new StringWriter());

            //print boxplot function
            rEngineInstance.engine.Evaluate(
                @"boxplotTissueCharge <- function(phenoMetabVals, plotTitle, plotYlabel, signifSymb, signifSymbYlevel, signifSymbXlevel) {
                    p <- ggplot(data=phenoMetabVals) + 
                                theme_bw(base_size=18) + 
                                ggtitle(plotTitle) +
                                ylab(plotYlabel) +
                                geom_boxplot(aes(factor(phenoMetabVals[,1]), phenoMetabVals[,2])) +" +
                geom_textLinesInGgplot2Function(phenotypes.Count) +
                @"theme(legend.position=""none"",
                                        axis.title.x = element_blank());
                    return (p);
                };");

            //keep track of the last charge value so that we know when to print the tissue names
            string lastCharge = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x).Last();
            //keep track of the first tissue value so that we know when to print the charge names
            string firstTissue = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x).First();

            //open the pdf stream
            rEngineInstance.engine.Evaluate(@"pdf(file=""" + outputFile.Replace("\\", "/") + @""", width=14, height=9)");

            //assisting list of tuples
            List <Tuple <string, double> > phenoMetabValPairs, pValPairs;

            //loop over custom metabolite IDs
            foreach (string mtblid in metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).Select(x => x.mtbltDetails.In_customId).Distinct().OrderBy(x => x))
            {
                //extract all the metabolites with the mtblid custom metabolite ID
                msMetabolite mtbl = metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == mtblid).mtbltDetails;
                //initialize the list that will store the boxplots
                rEngineInstance.engine.Evaluate("accumulateBoxplots <- list()");

                //loop over charges
                foreach (string charge in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x))
                {
                    //loop over tissues
                    foreach (string tissue in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x))
                    {
                        //set the plot title
                        rEngineInstance.engine.SetSymbol("boxPlotTitleAndYlabel", returnTitleAndYlabelCharacterVector(charge, lastCharge, tissue, firstTissue));

                        //if the metabolite has been detected for the given combination of tissue and charge then do the plot
                        if (metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype))
                            .SelectMany(x => x.ListOfMetabolites).Any(x => x.mtbltDetails.In_customId == mtbl.In_customId))
                        {
                            //initialize the assisting variable
                            phenoMetabValPairs = new List <Tuple <string, double> >();
                            pValPairs          = new List <Tuple <string, double> >();

                            //loop over all the metabolites for tissue and charge and non-ignore phenotypes
                            //in order to fill in the assisting variables
                            //these variables serve for significance symbols in the plot and for where to plce it in the plot
                            foreach (sampleForTissueAndCharge sftac in metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype)))
                            {
                                foreach (sampleForTissueAndCharge.parentMetabolite tdm in sftac.ListOfMetabolites.Where(x => x.mtbltDetails.In_customId == mtbl.In_customId))
                                {
                                    phenoMetabValPairs.Add(new Tuple <string, double>(sftac.Phenotype, tdm.mtbltVals.Imputed));
                                    if (pValPairs.Count == 0)
                                    {
                                        for (int i = 0; i < phenotypes.Count; i++)
                                        {
                                            for (int j = (i + 1); j < phenotypes.Count; j++)
                                            {
                                                pValPairs.Add(new Tuple <string, double>(phenotypes[i].First() + "v" + phenotypes[j].First(),
                                                                                         tdm.mtbltDetails.ListOfStats.PairwiseTestPvalue.First(x => x.group1 == phenotypes[i] && x.group2 == phenotypes[j]).pairValue));
                                            }
                                        }

                                        if (publicVariables.numberOfClasses != publicVariables.numberOfClassesValues.two)
                                        {
                                            pValPairs.Add(new Tuple <string, double>("", tdm.mtbltDetails.ListOfStats.MultiGroupPvalue));
                                        }
                                    }
                                }
                            }

                            //significance symbols matrix
                            rEngineInstance.engine.SetSymbol("signifSymbYlevel", returnSignificanceDataFrame(phenoMetabValPairs, pValPairs));

                            //sets the dataframe variable df in R
                            rEngineInstance.engine.SetSymbol("df", returnIEnurable(phenoMetabValPairs));

                            //do not plot anything to the console
                            //adds the boxplot in the list of plots
                            //stop not printing stuff in teh console
                            rEngineInstance.engine.Evaluate(@"pdf(NULL); 
                                                            accumulateBoxplots[[length(accumulateBoxplots) + 1]] <- 
                                                                boxplotTissueCharge(df, boxPlotTitleAndYlabel[1], boxPlotTitleAndYlabel[2], signifSymbYlevel[,1], signifSymbYlevel[,2], signifSymbYlevel[,3]);
                                                            dev.off();");
                        }
                        else //if the metabolite has not been detected for this combination of tissue and charge provide and empty plot
                        {
                            //empty plot: is defined in the initialization of rEngineInstance
                            rEngineInstance.engine.Evaluate("accumulateBoxplots[[length(accumulateBoxplots) + 1]] <- nullPlot(boxPlotTitleAndYlabel[1], boxPlotTitleAndYlabel[2])");
                        }
                    }
                }
                //do the plot
                rEngineInstance.engine.Evaluate(@"" + printBoxPlotGrid(metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().Count(), metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().Count()) +
                                                @"p <- grid.text(""" + mtbl.In_customId + @"_" + mtbl.In_Name + @""", x=unit(130,""mm""), y=unit(225,""mm""), just=c(""left"", ""top""), gp = gpar(fontface=""bold"", fontsize=24, col=""blue""));
                    print(p);");
            }
            rEngineInstance.engine.Evaluate(@"dev.off()");

            //Re-enable Console printings
            Console.SetOut(stdOut);
        }
Beispiel #2
0
        public static void printMyScatterplot(string outputFile)
        {
            //block (intercept) REngine from printing to the Console
            //we are just redirecting the output of it to some StringWriter
            var stdOut = Console.Out;

            Console.SetOut(new StringWriter());

            rEngineInstance.engine.Evaluate(
                @"scatterplotTissueCharge <- function(df, plotTitle, plotYlabel, coords, rcorrLabel) {

                    corrLine <- coef(lm(df$metab_vals ~ df$clindt_vals));

                    p <- ggplot(df) +
                                theme_bw(base_size = 18) +
                                ggtitle(paste(plotTitle)) +
                                ylab(plotYlabel) +
                                geom_point(aes(x = df$clindt_vals, y = df$metab_vals, color = factor(df$pheno))) +
                                geom_abline(intercept = corrLine[1], slope = corrLine[2]) +
                                geom_text(data = NULL, x = coords[1], y = coords[2], label = rcorrLabel) +
                                theme(legend.position = ""none"",
                                        axis.title.x = element_blank());
                    return (p);
                };");

            //keep track of the last charge value so that we know when to print the tissue names
            string lastCharge = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x).Last();
            //keep track of the first tissue value so that we know when to print the charge names
            string firstTissue = metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x).First();

            //assisting list of tuples
            List <rDataFrame>      phenoMetabValPairs;
            Tuple <double, double> corrPval;
            corrValCoords          _corrValCoord;

            foreach (sampleForTissueAndCharge.sampleClinicalData sClinData in metaboliteLevels.List_SampleForTissueAndCharge.First().ListOfNumClinicalData)
            {
                if (sClinData.typeOf == sampleForTissueAndCharge.sampleClinicalData.type.categorical)
                {
                    continue;
                }

                //open the pdf stream
                rEngineInstance.engine.Evaluate(@"pdf(file=""" + outputFile.Replace("\\", "/") + sClinData.name + @".pdf"", width=14, height=9)");

                //loop over custom metabolite IDs
                foreach (string mtblid in metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).Select(x => x.mtbltDetails.In_customId).Distinct().OrderBy(x => x))
                {
                    //extract all the metabolites with the mtblid custom metabolite ID
                    msMetabolite mtbl = metaboliteLevels.List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == mtblid).mtbltDetails;
                    //initialize the list that will store the boxplots
                    rEngineInstance.engine.Evaluate("accumulateScatterplots <- list()");

                    //loop over charges
                    foreach (string charge in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().OrderBy(x => x))
                    {
                        //loop over tissues
                        foreach (string tissue in metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().OrderBy(x => x))
                        {
                            //set the plot title
                            rEngineInstance.engine.SetSymbol("scatterPlotTitleAndYlabel", returnTitleAndYlabelCharacterVector(charge, lastCharge, tissue, firstTissue));

                            //if the metabolite has been detected for the given combination of tissue and charge then do the plot
                            if (metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype))
                                .SelectMany(x => x.ListOfMetabolites).Any(x => x.mtbltDetails.In_customId == mtbl.In_customId))
                            {
                                //initialize the assisting variable
                                phenoMetabValPairs = new List <rDataFrame>();

                                //loop over all the metabolites for tissue and charge and non-ignore phenotypes
                                //in order to fill in the assisting variables
                                //these variables serve for significance symbols in the plot and for where to plce it in the plot
                                foreach (sampleForTissueAndCharge sftac in metaboliteLevels.List_SampleForTissueAndCharge.Where(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype)))
                                {
                                    phenoMetabValPairs.Add(new rDataFrame()
                                    {
                                        phenotype     = sftac.Phenotype,
                                        metabolite    = sftac.ListOfMetabolites.First(x => x.mtbltDetails.In_customId == mtbl.In_customId).mtbltVals.Imputed,
                                        clinical_data = sftac.ListOfNumClinicalData.First(x => x.name == sClinData.name).n_value
                                    });
                                }

                                //retrieve correlation value and p-value between metabolite and clinical data
                                corrPval = metaboliteLevels.List_SampleForTissueAndCharge.First(x => x.Tissue == tissue && x.Charge == charge && !publicVariables.excludedPhenotypes.Contains(x.Phenotype))
                                           .ListOfMetabolites.First(x => x.mtbltDetails.In_customId == mtbl.In_customId).mtbltDetails.ListOfStats.CorrelationValues.Where(x => x.clinical_data_name == sClinData.name)
                                           .Select(x => new Tuple <double, double>(Math.Round(x.corr_value, 2), x.pValueUnadjust)).ToList().First();

                                //significance symbols and coordinates
                                _corrValCoord = returnSignificanceDataFrame(phenoMetabValPairs, corrPval);
                                rEngineInstance.engine.SetSymbol("xCorrCoord", _corrValCoord.xCoord);
                                rEngineInstance.engine.SetSymbol("yCorrCoord", _corrValCoord.yCoord);
                                rEngineInstance.engine.Evaluate("coords <- c(xCorrCoord, yCorrCoord)");
                                rEngineInstance.engine.SetSymbol("rcorrLabel", _corrValCoord.label);

                                //sets the dataframe variable df in R
                                CharacterVector pheno = rEngineInstance.engine.CreateCharacterVector(phenoMetabValPairs.Select(x => x.phenotype).ToArray());
                                rEngineInstance.engine.SetSymbol("pheno", pheno);
                                NumericVector clindt_vals = rEngineInstance.engine.CreateNumericVector(phenoMetabValPairs.Select(x => x.clinical_data).ToArray());
                                rEngineInstance.engine.SetSymbol("clindt_vals", clindt_vals);
                                NumericVector metab_vals = rEngineInstance.engine.CreateNumericVector(phenoMetabValPairs.Select(x => x.metabolite).ToArray());
                                rEngineInstance.engine.SetSymbol("metab_vals", metab_vals);
                                rEngineInstance.engine.Evaluate("df <- cbind.data.frame(metab_vals, clindt_vals, pheno)");

                                //do not plot anything to the console
                                //adds the boxplot in the list of plots
                                //stop not printing stuff in teh console
                                rEngineInstance.engine.Evaluate(@"pdf(NULL); 
                                                            accumulateScatterplots[[length(accumulateScatterplots) + 1]] <- 
                                                                scatterplotTissueCharge(df, scatterPlotTitleAndYlabel[1], scatterPlotTitleAndYlabel[2], coords, rcorrLabel);
                                                            dev.off();");
                            }
                            else //if the metabolite has not been detected for this combination of tissue and charge provide and empty plot
                            {
                                //empty plot: is defined in the initialization of rEngineInstance
                                rEngineInstance.engine.Evaluate("accumulateScatterplots[[length(accumulateScatterplots) + 1]] <- nullPlot(scatterPlotTitleAndYlabel[1], scatterPlotTitleAndYlabel[2])");
                            }
                        }
                    }
                    //do the plot
                    rEngineInstance.engine.Evaluate(@"" + printBoxPlotGrid(metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Charge).Distinct().Count(), metaboliteLevels.List_SampleForTissueAndCharge.Select(x => x.Tissue).Distinct().Count()) +
                                                    @"p <- grid.text(""x-" + sClinData.name + @" | y-" + mtbl.In_customId + @"_" + mtbl.In_Name + @""", x=unit(10,""mm""), y=unit(225,""mm""), just=c(""left"", ""top""), gp = gpar(fontface=""bold"", fontsize=24, col=""blue""));
                    print(p);");
                }
                rEngineInstance.engine.Evaluate(@"dev.off()");
            }

            //Re-enable Console printings
            Console.SetOut(stdOut);
        }
Beispiel #3
0
        private static List <msMetabolite> ReadInputMetabolitesFromDatabase(List <string> listOfCsvLines)
        {
            List <msMetabolite> listOfMetabolitesPerTissueAndCharge = new List <msMetabolite>();
            msMetabolite        msMetab;
            bool addToList = false, isDuplicate = false;

            for (int i = publicVariables.indexToStartFrom; i < listOfCsvLines.First().Split(publicVariables.breakCharInFile).Length; i++)
            {
                msMetab = new msMetabolite()
                {
                    In_Index             = i,
                    In_Name              = listOfCsvLines.First().Split(publicVariables.breakCharInFile).ElementAt(i).Trim(),
                    In_Type              = listOfCsvLines.ElementAt(1).Split(publicVariables.breakCharInFile).ElementAt(i).Trim(),
                    In_Formula           = listOfCsvLines.ElementAt(2).Split(publicVariables.breakCharInFile).ElementAt(i).Replace(" ", ""),
                    In_Mass              = Convert.ToDouble(listOfCsvLines.ElementAt(3).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_Rt                = Convert.ToDouble(listOfCsvLines.ElementAt(4).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_customId          = listOfCsvLines.ElementAt(5).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_Cas_id            = listOfCsvLines.ElementAt(6).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Cas_id        = listOfCsvLines.ElementAt(7).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Hmdb_id           = listOfCsvLines.ElementAt(8).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Hmdb_id       = listOfCsvLines.ElementAt(9).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Kegg_id           = listOfCsvLines.ElementAt(10).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Kegg_id       = listOfCsvLines.ElementAt(11).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Chebi_id          = listOfCsvLines.ElementAt(12).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Chebi_id      = listOfCsvLines.ElementAt(13).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Pubchem_id        = listOfCsvLines.ElementAt(14).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Pubchem_id    = listOfCsvLines.ElementAt(15).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Chemspider_id     = listOfCsvLines.ElementAt(16).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Chemspider_id = listOfCsvLines.ElementAt(17).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Lipidmaps_id      = listOfCsvLines.ElementAt(18).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Lipidmaps_id  = listOfCsvLines.ElementAt(19).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_Metlin_id         = listOfCsvLines.ElementAt(20).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_add_Metlin_id     = listOfCsvLines.ElementAt(21).Split(publicVariables.breakCharInFile).ElementAt(i).Split('|').ToList(),
                    In_isProblematic     = Convert.ToBoolean(listOfCsvLines.ElementAt(22).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_msMsConfirmed     = Convert.ToBoolean(listOfCsvLines.ElementAt(23).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_inBlank           = Convert.ToBoolean(listOfCsvLines.ElementAt(24).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_msProblematic     = Convert.ToBoolean(listOfCsvLines.ElementAt(25).Split(publicVariables.breakCharInFile).ElementAt(i)),
                    In_AZmSuperClass     = listOfCsvLines.ElementAt(26).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_AZmClass          = listOfCsvLines.ElementAt(27).Split(publicVariables.breakCharInFile).ElementAt(i),
                    In_AZmNameFixed      = listOfCsvLines.ElementAt(28).Split(publicVariables.breakCharInFile).ElementAt(i)
                };

                if (!string.IsNullOrEmpty(msMetab.In_customId) && !string.IsNullOrWhiteSpace(msMetab.In_customId) && msMetab.In_Type == "Metabolite")
                {
                    if (listOfMetaboliteIDs.Any(x => x.Split('_').First() == msMetab.In_customId))
                    {
                        if (listOfMetabolitesPerTissueAndCharge.Any(x => x.In_customId.Split('_').First() == msMetab.In_customId))
                        {
                            msMetab.ToHMDB_metabolite(listOfMetabolitesPerTissueAndCharge.First(x => x.In_customId.Split('_').First() == msMetab.In_customId));
                            if (listOfMetabolitesPerTissueAndCharge.Count(x => x.In_customId.Split('_').First() == msMetab.In_customId) == 1)
                            {
                                msMetab.In_customId = msMetab.In_customId + "_1";
                            }
                            else
                            {
                                msMetab.In_customId = msMetab.In_customId + "_" + Convert.ToString(listOfMetabolitesPerTissueAndCharge.Where(x => x.In_customId.Split('_').First() == msMetab.In_customId)
                                                                                                   .Select(x => x.In_customId).Where(x => x.Split('_').Length > 1).Select(x => Convert.ToInt32(x.Split('_').Last())).Max() + 1);
                            }
                            addToList = true;
                        }
                        else
                        {
                            msMetab.ToHMDB_metabolite(List_SampleForTissueAndCharge.SelectMany(x => x.ListOfMetabolites).First(x => x.mtbltDetails.In_customId == msMetab.In_customId).mtbltDetails);
                            addToList = false;
                        }
                        isDuplicate = false;
                    }
                    else
                    {
                        msMetab.getFromMetaboliteDB();
                        addToList   = true;
                        isDuplicate = false;
                    }
                }
                else if (msMetab.In_Type == "IS")
                {
                    addToList   = false;
                    isDuplicate = true;
                }

                if (addToList)
                {
                    listOfMetaboliteIDs.Add(msMetab.In_customId);
                }

                if (!isDuplicate)
                {
                    listOfMetabolitesPerTissueAndCharge.Add(msMetab);
                }
            }

            return(listOfMetabolitesPerTissueAndCharge);
        }