Ejemplo n.º 1
0
        public void RunTraining(string rawDirectoryPath, string targetFilePath, string decoyFilePath, double hcdError = 30, double cidError = 500)
        {
            // Read target file
            var targetReader = new LipidMapsDbReader <Lipid>();
            var targets      = targetReader.ReadFile(new FileInfo(targetFilePath));

            // Read decoy file
            var decoyReader = new LipidMapsDbReader <Lipid>();
            var decoys      = decoyReader.ReadFile(new FileInfo(decoyFilePath));

            var files = Directory.GetFiles(rawDirectoryPath);

            foreach (var rawFilePath in files.Where(file => file.EndsWith(".raw")))
            {
                // create output paths
                var rawFileName       = Path.GetFileName(rawFilePath);
                var datasetPath       = Path.GetDirectoryName(rawFilePath);
                var datasetName       = Path.GetFileNameWithoutExtension(rawFilePath);
                var targetResultsPath = Path.Combine(datasetPath, string.Format("{0}_target.tsv", datasetName));
                var decoyResultsPath  = Path.Combine(datasetPath, string.Format("{0}_decoy.tsv", datasetName));

                // Run liquid global workflow
                var globalWorkflow = new GlobalWorkflow(rawFilePath);
                var targetResults  = globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError);
                var decoyResults   = globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError);

                // Output results
                LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName);
                LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName);

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }
        }
Ejemplo n.º 2
0
        public void OnWriteTargetInfo(string fileLocation)
        {
            IProgress <int> progress = new Progress <int>(ReportGlobalWorkflowProgress);

            LipidGroupSearchResultWriter.OutputTargetInfo(LipidTargetList, fileLocation, Path.GetFileName(RawFilePath), progress);
            progress.Report(0);
        }
Ejemplo n.º 3
0
        public void OnWriteFragmentInfo(string fileLocation)
        {
            IProgress <int> progress        = new Progress <int>(ReportFragmentSearchProgress);
            var             resultsToExport = SpectrumSearchResultList.Where(x => x.ShouldExport).ToList();

            LipidGroupSearchResultWriter.OutputFragmentInfo(resultsToExport, TargetAdduct, FragmentSearchList, LcMsRun, fileLocation, Path.GetFileName(RawFilePath), progress);
            progress.Report(0);
        }
Ejemplo n.º 4
0
        public void OnExportGlobalResults(string fileLocation)
        {
            IProgress <int> progress        = new Progress <int>(ReportGlobalWorkflowProgress);
            var             resultsToExport = LipidGroupSearchResultList.Where(x => x.ShouldExport).ToList();

            LipidGroupSearchResultWriter.OutputResults(resultsToExport, fileLocation, Path.GetFileName(RawFilePath), progress);
            progress.Report(0);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Main functionality for running the LIQUID workflow and outputting the results
        /// </summary>
        /// <param name="targetsFilePath"></param>
        /// <param name="outputFileName"></param>
        /// <param name="datasetNamesList"></param>
        private void RunWorkflowAndOutput(string targetsFilePath, string outputFileName, List <string> datasetNamesList)
        {
            var targetsFileInfo = new FileInfo(targetsFilePath);
            var lipidReader     = new LipidMapsDbReader <Lipid>();
            var lipidList       = lipidReader.ReadFile(targetsFileInfo);
            var headerWritten   = false;

            foreach (var datasetName in datasetNamesList)
            {
                var rawFileName = datasetName + ".raw";

                var rawFilePath = Path.Combine(@"D:\Data\Liquid\Original", rawFileName);


                Console.WriteLine(DateTime.Now + ": Processing " + datasetName);

                if (File.Exists(rawFilePath))
                {
                    Console.WriteLine(DateTime.Now + ": Dataset already exists");
                }
                else
                {
                    Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it");

                    // Lookup in DMS via Mage
                    var dmsFolder         = DmsDatasetFinder.FindLocationOfDataset(datasetName);
                    var dmsDirectoryInfo  = new DirectoryInfo(dmsFolder);
                    var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName);

                    // Copy Locally
                    // TODO: Handle files that are on MyEMSL
                    Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName);
                    File.Copy(fullPathToDmsFile, rawFilePath);
                    Console.WriteLine(DateTime.Now + ": Copy complete");
                }

                // Setup workflow
                var globalWorkflow = new GlobalWorkflow(rawFilePath);

                // Run workflow
                var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

                if (!headerWritten)
                {
                    LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, true);
                    headerWritten = true;
                }
                else
                {
                    LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, false);
                }

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }
        }
Ejemplo n.º 6
0
        public void TestGlobalWorkflowPositive()
        {
            var rawFileLocation = @"../../../testFiles/Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05.raw";
            var globalWorkflow  = new GlobalWorkflow(rawFileLocation);

            var fileLocation = @"../../../testFiles/Global_LipidMaps_Pos.txt";
            var fileInfo     = new FileInfo(fileLocation);
            var lipidReader  = new LipidMapsDbReader <Lipid>();
            var lipidList    = lipidReader.ReadFile(fileInfo);

            var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

            var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>();

            // Group results of same scan together
            var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum);

            // Grab the result(s) with the best score
            foreach (var group in resultsGroupedByScan)
            {
                var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList();

                for (var i = 0; i < 1 && i < groupOrdered.Count; i++)
                {
                    var resultToAdd = groupOrdered[i];

                    if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2)
                    {
                        filteredLipidGroupSearchResults.Add(resultToAdd);
                    }
                }
            }

            if (File.Exists("fragmentOutput.csv"))
            {
                File.Delete("fragmentOutput.csv");
            }
            TextWriter textWriter = new StreamWriter("fragmentOutput.csv");

            LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter);
            LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, "Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05");

            // Assure that the source data file is closed
            globalWorkflow.LcMsRun.Close();

            textWriter.Close();
        }
Ejemplo n.º 7
0
        public void TestCreateScoringOutput()
        {
            const string positiveTargetsFileLocation = @"../../../testFiles/Global_LipidMaps_POS_v3.txt";
            var          positiveTargetsFileInfo     = new FileInfo(positiveTargetsFileLocation);
            var          lipidReader = new LipidMapsDbReader <Lipid>();
            var          lipidList   = lipidReader.ReadFile(positiveTargetsFileInfo);

            if (File.Exists("fragmentOutput.csv"))
            {
                File.Delete("fragmentOutput.csv");
            }
            TextWriter textWriter = new StreamWriter("fragmentOutput.csv");

            var datasetNames = new List <string>
            {
                //datasetNames.Add("Dey_lipids_Top_1_1_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_1_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_1_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_1_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_1_pos_dil_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_3_pos_Gimli_RZ-12-07-05");
                "XGA121_lipid_Calu3_1",
                "XGA121_lipid_Calu3_2",
                "XGA121_lipid_Calu3_3",
                "XGA121_lipid_Skin_1",
                "XGA121_lipid_Skin_2",
                "XGA121_lipid_Skin_3",
                "XGA121_lipid_plasma_1",
                "XGA121_lipid_plasma_2",
                "XGA121_lipid_plasma_3",
                "Vero_01_CM_0d_4_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_4_Lipid_POS_Gimli_15Jan14_13-07-04",
                "Vero_01_MTBE_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01",
                "LCA_Atta_B_gar2_b_Reruns_31May13_Gimli_12-07-01",
                "LCA_Atta_T_gar1_a1_Reruns_31May13_Gimli_12-07-01",
                "LCA_Atta_M_gar3_a_Reruns_31May13_Gimli_12-07-01",
                "Da_12_1_POS_3K_Gimli_9Oct13_13-07-01",
                "Da_24_1_POS_3K_Gimli_9Oct13_13-07-01",
                //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_14Jan14_13-07-01");
                //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_17JAN_13-07-01");
                "Daphnia_gut_TLE_POS_Gimli_21Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_01_POS_Gimli_24Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_02_POS_Gimli_24Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_03_POS_Gimli_24Jan14_13-07-01",
                "Oscar_28days_TLE__POS_04Feb14_13-07-01",
                "Oscar_21days_TLE__POS_04Feb14_13-07-01",
                "Oscar_21days_dark_TLE__POS_04Feb14_13-07-01",
                "Oscar_14day_TLE__POS_04Feb14_13-07-01"
            };

            for (var datasetIndex = 0; datasetIndex < datasetNames.Count; datasetIndex++)
            {
                var datasetName = datasetNames[datasetIndex];
                var rawFileName = datasetName + ".raw";

                Console.WriteLine(DateTime.Now + ": Processing " + datasetName);

                if (File.Exists(rawFileName))
                {
                    Console.WriteLine(DateTime.Now + ": Dataset already exists");
                }
                else
                {
                    Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it");

                    // Lookup in DMS via Mage
                    var dmsFolder         = DmsDatasetFinder.FindLocationOfDataset(datasetName);
                    var dmsDirectoryInfo  = new DirectoryInfo(dmsFolder);
                    var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName);

                    // Copy Locally
                    // TODO: Handle files that are on MyEMSL
                    Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName);
                    File.Copy(fullPathToDmsFile, rawFileName);
                    Console.WriteLine(DateTime.Now + ": Copy complete");
                }

                // Setup workflow
                var globalWorkflow = new GlobalWorkflow(rawFileName);

                // Run workflow
                var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

                var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>();

                // Group results of same scan together
                var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum);

                // Grab the result(s) with the best score
                foreach (var group in resultsGroupedByScan)
                {
                    var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList();

                    for (var i = 0; i < 1 && i < groupOrdered.Count; i++)
                    {
                        var resultToAdd = groupOrdered[i];

                        if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC &&
                            resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 &&
                            resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2)
                        {
                            filteredLipidGroupSearchResults.Add(resultToAdd);
                        }
                    }
                }

                // Output results
                if (datasetIndex == 0)
                {
                    LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter);
                }
                LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, datasetName);

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }

            textWriter.Close();
        }
Ejemplo n.º 8
0
        public void RunTrainingOnFileList(
            string fileListPath,
            string posTargetFilePath,
            string posDecoyFilePath,
            string negTargetFilePath,
            string negDecoyFilePath,
            double hcdError = 30,
            double cidError = 500)
        {
            // Read positive target file
            var posTargetReader = new LipidMapsDbReader <Lipid>();
            var posTargets      = posTargetReader.ReadFile(new FileInfo(posTargetFilePath));

            // Read positive decoy file
            var posDecoyReader = new LipidMapsDbReader <Lipid>();
            var posDecoys      = posDecoyReader.ReadFile(new FileInfo(posDecoyFilePath));

            // Read positive target file
            var negTargetReader = new LipidMapsDbReader <Lipid>();
            var negTargets      = negTargetReader.ReadFile(new FileInfo(negTargetFilePath));

            // Read positive decoy file
            var negDecoyReader = new LipidMapsDbReader <Lipid>();
            var negDecoys      = negDecoyReader.ReadFile(new FileInfo(negDecoyFilePath));

            var outputDirectory = Path.GetDirectoryName(fileListPath);
            var errorFile       = Path.Combine(outputDirectory, "failedDatasets.txt");

            foreach (var datasetName in File.ReadLines(fileListPath))
            {
                if (datasetName.StartsWith("//"))
                {
                    continue;
                }

                try
                {
                    // create output paths
                    var rawFilePath       = GetRawFilePath(outputDirectory, datasetName);
                    var rawFileName       = Path.GetFileName(rawFilePath);
                    var targetResultsPath = Path.Combine(outputDirectory, string.Format("{0}_target.tsv", datasetName));
                    var decoyResultsPath  = Path.Combine(outputDirectory, string.Format("{0}_decoy.tsv", datasetName));

                    IEnumerable <Lipid> targets;
                    IEnumerable <Lipid> decoys;

                    // Select targets and decoys
                    var lowerCaseName = datasetName.ToLower();
                    if (lowerCaseName.Contains("pos"))
                    {
                        targets = posTargets;
                        decoys  = posDecoys;
                    }
                    else
                    {
                        targets = negTargets;
                        decoys  = negDecoys;
                    }

                    // Run liquid global workflow
                    var globalWorkflow = new GlobalWorkflow(rawFilePath);
                    var targetResults  = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError));
                    var decoyResults   = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError));

                    // Output results
                    LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName);
                    LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName);

                    // Assure that the source data file is closed
                    globalWorkflow.LcMsRun.Close();
                }
                catch (Exception)
                {
                    Console.WriteLine("ERROR: Could not process dataset {0}.", datasetName);
                    using (var streamWriter = new StreamWriter(errorFile, true))
                    {
                        streamWriter.WriteLine(datasetName);
                    }
                }
            }
        }