예제 #1
0
        public void RunTraining(string rawDirectoryPath, string targetFilePath, string decoyFilePath, double hcdError = 30, double cidError = 500)
        {
            // Read target file
            var targetReader = new LipidMapsDbReader <Lipid>();
            var targets      = targetReader.ReadFile(new FileInfo(targetFilePath));

            // Read decoy file
            var decoyReader = new LipidMapsDbReader <Lipid>();
            var decoys      = decoyReader.ReadFile(new FileInfo(decoyFilePath));

            var files = Directory.GetFiles(rawDirectoryPath);

            foreach (var rawFilePath in files.Where(file => file.EndsWith(".raw")))
            {
                // create output paths
                var rawFileName       = Path.GetFileName(rawFilePath);
                var datasetPath       = Path.GetDirectoryName(rawFilePath);
                var datasetName       = Path.GetFileNameWithoutExtension(rawFilePath);
                var targetResultsPath = Path.Combine(datasetPath, string.Format("{0}_target.tsv", datasetName));
                var decoyResultsPath  = Path.Combine(datasetPath, string.Format("{0}_decoy.tsv", datasetName));

                // Run liquid global workflow
                var globalWorkflow = new GlobalWorkflow(rawFilePath);
                var targetResults  = globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError);
                var decoyResults   = globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError);

                // Output results
                LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName);
                LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName);

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }
        }
예제 #2
0
        /// <summary>
        /// Main functionality for running the LIQUID workflow and outputting the results
        /// </summary>
        /// <param name="targetsFilePath"></param>
        /// <param name="outputFileName"></param>
        /// <param name="datasetNamesList"></param>
        private void RunWorkflowAndOutput(string targetsFilePath, string outputFileName, List <string> datasetNamesList)
        {
            var targetsFileInfo = new FileInfo(targetsFilePath);
            var lipidReader     = new LipidMapsDbReader <Lipid>();
            var lipidList       = lipidReader.ReadFile(targetsFileInfo);
            var headerWritten   = false;

            foreach (var datasetName in datasetNamesList)
            {
                var rawFileName = datasetName + ".raw";

                var rawFilePath = Path.Combine(@"D:\Data\Liquid\Original", rawFileName);


                Console.WriteLine(DateTime.Now + ": Processing " + datasetName);

                if (File.Exists(rawFilePath))
                {
                    Console.WriteLine(DateTime.Now + ": Dataset already exists");
                }
                else
                {
                    Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it");

                    // Lookup in DMS via Mage
                    var dmsFolder         = DmsDatasetFinder.FindLocationOfDataset(datasetName);
                    var dmsDirectoryInfo  = new DirectoryInfo(dmsFolder);
                    var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName);

                    // Copy Locally
                    // TODO: Handle files that are on MyEMSL
                    Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName);
                    File.Copy(fullPathToDmsFile, rawFilePath);
                    Console.WriteLine(DateTime.Now + ": Copy complete");
                }

                // Setup workflow
                var globalWorkflow = new GlobalWorkflow(rawFilePath);

                // Run workflow
                var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

                if (!headerWritten)
                {
                    LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, true);
                    headerWritten = true;
                }
                else
                {
                    LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, false);
                }

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }
        }
예제 #3
0
        public void TestGlobalWorkflowNegative()
        {
            var rawFileLocation = @"../../../testFiles/Dey_Lipids_Top_2_3_rerun_Neg_05Jul13_Gimli_12-07-05.raw";
            var globalWorkflow  = new GlobalWorkflow(rawFileLocation);

            var fileLocation = @"../../../testFiles/Global_LipidMaps_Neg.txt";
            var fileInfo     = new FileInfo(fileLocation);
            var lipidReader  = new LipidMapsDbReader <Lipid>();
            var lipidList    = lipidReader.ReadFile(fileInfo);

            globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

            // Assure that the source data file is closed
            globalWorkflow.LcMsRun.Close();
        }
예제 #4
0
        public void TestGlobalWorkflowPositive()
        {
            var rawFileLocation = @"../../../testFiles/Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05.raw";
            var globalWorkflow  = new GlobalWorkflow(rawFileLocation);

            var fileLocation = @"../../../testFiles/Global_LipidMaps_Pos.txt";
            var fileInfo     = new FileInfo(fileLocation);
            var lipidReader  = new LipidMapsDbReader <Lipid>();
            var lipidList    = lipidReader.ReadFile(fileInfo);

            var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

            var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>();

            // Group results of same scan together
            var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum);

            // Grab the result(s) with the best score
            foreach (var group in resultsGroupedByScan)
            {
                var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList();

                for (var i = 0; i < 1 && i < groupOrdered.Count; i++)
                {
                    var resultToAdd = groupOrdered[i];

                    if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2)
                    {
                        filteredLipidGroupSearchResults.Add(resultToAdd);
                    }
                }
            }

            if (File.Exists("fragmentOutput.csv"))
            {
                File.Delete("fragmentOutput.csv");
            }
            TextWriter textWriter = new StreamWriter("fragmentOutput.csv");

            LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter);
            LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, "Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05");

            // Assure that the source data file is closed
            globalWorkflow.LcMsRun.Close();

            textWriter.Close();
        }
예제 #5
0
        public void OnProcessAllTarget(double hcdError, double cidError, FragmentationMode fragmentationMode, int numResultsPerScanToInclude)
        {
            IProgress <int> progress = new Progress <int>(ReportGlobalWorkflowProgress);

            // Make sure to only look at targets that match the fragmentation mode
            var targetsToProcess = LipidTargetList.Where(x => x.LipidTarget.FragmentationMode == fragmentationMode);

            // Run global analysis
            LipidGroupSearchResultList = new List <LipidGroupSearchResult>();

            IEnumerable <IGrouping <double, LipidGroupSearchResult> > resultsGrouped;
            List <LipidGroupSearchResult> lipidGroupSearchResultList;

            if (AverageSpec)
            {
                lipidGroupSearchResultList = GlobalWorkflow.RunGlobalWorkflowAvgSpec(targetsToProcess, LcMsRun, hcdError, cidError, ScoreModel, progress);
                resultsGrouped             = lipidGroupSearchResultList.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum?.IsolationWindow.IsolationWindowTargetMz ?? x.SpectrumSearchResult.CidSpectrum.IsolationWindow.IsolationWindowTargetMz);
            }
            else
            {
                lipidGroupSearchResultList = GlobalWorkflow.RunGlobalWorkflow(targetsToProcess, LcMsRun, hcdError, cidError, ScoreModel, progress);
                resultsGrouped             = lipidGroupSearchResultList.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum?.ScanNum ?? (double)x.SpectrumSearchResult.CidSpectrum.ScanNum);
            }

            // Group results of same scan together

            // Grab the result(s) with the best score
            foreach (var group in resultsGrouped)
            {
                var groupOrdered = group.OrderByDescending(x => x.Score).ToList();

                for (var i = 0; i < numResultsPerScanToInclude && i < groupOrdered.Count; i++)
                {
                    var resultToAdd = groupOrdered[i];
                    LipidGroupSearchResultList.Add(resultToAdd);
                }
            }
            OnPropertyChanged("LipidGroupSearchResultList");
            progress.Report(0);
        }
예제 #6
0
        public void TestCreateScoringOutput()
        {
            const string positiveTargetsFileLocation = @"../../../testFiles/Global_LipidMaps_POS_v3.txt";
            var          positiveTargetsFileInfo     = new FileInfo(positiveTargetsFileLocation);
            var          lipidReader = new LipidMapsDbReader <Lipid>();
            var          lipidList   = lipidReader.ReadFile(positiveTargetsFileInfo);

            if (File.Exists("fragmentOutput.csv"))
            {
                File.Delete("fragmentOutput.csv");
            }
            TextWriter textWriter = new StreamWriter("fragmentOutput.csv");

            var datasetNames = new List <string>
            {
                //datasetNames.Add("Dey_lipids_Top_1_1_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_1_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_1_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_1_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_1_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_1_pos_dil_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Top_2_3_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_2_pos_Gimli_RZ-12-07-05");
                //datasetNames.Add("Dey_lipids_Bottom_2_3_pos_Gimli_RZ-12-07-05");
                "XGA121_lipid_Calu3_1",
                "XGA121_lipid_Calu3_2",
                "XGA121_lipid_Calu3_3",
                "XGA121_lipid_Skin_1",
                "XGA121_lipid_Skin_2",
                "XGA121_lipid_Skin_3",
                "XGA121_lipid_plasma_1",
                "XGA121_lipid_plasma_2",
                "XGA121_lipid_plasma_3",
                "Vero_01_CM_0d_4_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_CM_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_4_Lipid_POS_Gimli_15Jan14_13-07-04",
                "Vero_01_MTBE_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01",
                "Vero_01_MTBE_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01",
                "LCA_Atta_B_gar2_b_Reruns_31May13_Gimli_12-07-01",
                "LCA_Atta_T_gar1_a1_Reruns_31May13_Gimli_12-07-01",
                "LCA_Atta_M_gar3_a_Reruns_31May13_Gimli_12-07-01",
                "Da_12_1_POS_3K_Gimli_9Oct13_13-07-01",
                "Da_24_1_POS_3K_Gimli_9Oct13_13-07-01",
                //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_14Jan14_13-07-01");
                //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_17JAN_13-07-01");
                "Daphnia_gut_TLE_POS_Gimli_21Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_01_POS_Gimli_24Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_02_POS_Gimli_24Jan14_13-07-01",
                "OMICS_HH_CDT_Lip_108_03_POS_Gimli_24Jan14_13-07-01",
                "Oscar_28days_TLE__POS_04Feb14_13-07-01",
                "Oscar_21days_TLE__POS_04Feb14_13-07-01",
                "Oscar_21days_dark_TLE__POS_04Feb14_13-07-01",
                "Oscar_14day_TLE__POS_04Feb14_13-07-01"
            };

            for (var datasetIndex = 0; datasetIndex < datasetNames.Count; datasetIndex++)
            {
                var datasetName = datasetNames[datasetIndex];
                var rawFileName = datasetName + ".raw";

                Console.WriteLine(DateTime.Now + ": Processing " + datasetName);

                if (File.Exists(rawFileName))
                {
                    Console.WriteLine(DateTime.Now + ": Dataset already exists");
                }
                else
                {
                    Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it");

                    // Lookup in DMS via Mage
                    var dmsFolder         = DmsDatasetFinder.FindLocationOfDataset(datasetName);
                    var dmsDirectoryInfo  = new DirectoryInfo(dmsFolder);
                    var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName);

                    // Copy Locally
                    // TODO: Handle files that are on MyEMSL
                    Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName);
                    File.Copy(fullPathToDmsFile, rawFileName);
                    Console.WriteLine(DateTime.Now + ": Copy complete");
                }

                // Setup workflow
                var globalWorkflow = new GlobalWorkflow(rawFileName);

                // Run workflow
                var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500);

                var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>();

                // Group results of same scan together
                var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum);

                // Grab the result(s) with the best score
                foreach (var group in resultsGroupedByScan)
                {
                    var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList();

                    for (var i = 0; i < 1 && i < groupOrdered.Count; i++)
                    {
                        var resultToAdd = groupOrdered[i];

                        if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC &&
                            resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 &&
                            resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2)
                        {
                            filteredLipidGroupSearchResults.Add(resultToAdd);
                        }
                    }
                }

                // Output results
                if (datasetIndex == 0)
                {
                    LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter);
                }
                LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, datasetName);

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }

            textWriter.Close();
        }
예제 #7
0
        public void RunTrainingOnFileList(
            string fileListPath,
            string posTargetFilePath,
            string posDecoyFilePath,
            string negTargetFilePath,
            string negDecoyFilePath,
            double hcdError = 30,
            double cidError = 500)
        {
            // Read positive target file
            var posTargetReader = new LipidMapsDbReader <Lipid>();
            var posTargets      = posTargetReader.ReadFile(new FileInfo(posTargetFilePath));

            // Read positive decoy file
            var posDecoyReader = new LipidMapsDbReader <Lipid>();
            var posDecoys      = posDecoyReader.ReadFile(new FileInfo(posDecoyFilePath));

            // Read positive target file
            var negTargetReader = new LipidMapsDbReader <Lipid>();
            var negTargets      = negTargetReader.ReadFile(new FileInfo(negTargetFilePath));

            // Read positive decoy file
            var negDecoyReader = new LipidMapsDbReader <Lipid>();
            var negDecoys      = negDecoyReader.ReadFile(new FileInfo(negDecoyFilePath));

            var outputDirectory = Path.GetDirectoryName(fileListPath);
            var errorFile       = Path.Combine(outputDirectory, "failedDatasets.txt");

            foreach (var datasetName in File.ReadLines(fileListPath))
            {
                if (datasetName.StartsWith("//"))
                {
                    continue;
                }

                try
                {
                    // create output paths
                    var rawFilePath       = GetRawFilePath(outputDirectory, datasetName);
                    var rawFileName       = Path.GetFileName(rawFilePath);
                    var targetResultsPath = Path.Combine(outputDirectory, string.Format("{0}_target.tsv", datasetName));
                    var decoyResultsPath  = Path.Combine(outputDirectory, string.Format("{0}_decoy.tsv", datasetName));

                    IEnumerable <Lipid> targets;
                    IEnumerable <Lipid> decoys;

                    // Select targets and decoys
                    var lowerCaseName = datasetName.ToLower();
                    if (lowerCaseName.Contains("pos"))
                    {
                        targets = posTargets;
                        decoys  = posDecoys;
                    }
                    else
                    {
                        targets = negTargets;
                        decoys  = negDecoys;
                    }

                    // Run liquid global workflow
                    var globalWorkflow = new GlobalWorkflow(rawFilePath);
                    var targetResults  = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError));
                    var decoyResults   = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError));

                    // Output results
                    LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName);
                    LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName);

                    // Assure that the source data file is closed
                    globalWorkflow.LcMsRun.Close();
                }
                catch (Exception)
                {
                    Console.WriteLine("ERROR: Could not process dataset {0}.", datasetName);
                    using (var streamWriter = new StreamWriter(errorFile, true))
                    {
                        streamWriter.WriteLine(datasetName);
                    }
                }
            }
        }
예제 #8
0
        public ScoreModel CreateScoreModel(double hcdError, double cidError)
        {
            const int numTopHitsToConsider = 1;

            var observationDictionary = new Dictionary <SpecificFragment, List <double> >();

            foreach (var datasetLocation in DatasetLocations)
            {
                // Setup workflow
                var globalWorkflow = new GlobalWorkflow(datasetLocation);

                // Run workflow
                var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(LipidList, 30, 500);

                // Group results of same scan together
                var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum);

                // Grab the result(s) with the best score
                foreach (var group in resultsGroupedByScan)
                {
                    var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList();

                    for (var i = 0; i < numTopHitsToConsider && i < groupOrdered.Count; i++)
                    {
                        var resultToAdd = groupOrdered[i];

                        var lipidTarget          = resultToAdd.LipidTarget;
                        var lipidClass           = lipidTarget.LipidClass;
                        var lipidType            = lipidTarget.LipidType;
                        var fragmentationMode    = lipidTarget.FragmentationMode;
                        var spectrumSearchResult = resultToAdd.SpectrumSearchResult;
                        var cidResultList        = spectrumSearchResult.CidSearchResultList;
                        var hcdResultList        = spectrumSearchResult.HcdSearchResultList;

                        var cidMaxValue = spectrumSearchResult.CidSpectrum.Peaks.Any() ? spectrumSearchResult.CidSpectrum.Peaks.Max(x => x.Intensity) : 1;
                        var hcdMaxValue = spectrumSearchResult.HcdSpectrum.Peaks.Any() ? spectrumSearchResult.HcdSpectrum.Peaks.Max(x => x.Intensity) : 1;

                        // CID Results
                        foreach (var cidResult in cidResultList)
                        {
                            var    fragment  = cidResult.TheoreticalPeak.Description;
                            double intensity = 0;

                            if (cidResult.ObservedPeak != null)
                            {
                                intensity = Math.Log10(cidResult.ObservedPeak.Intensity) / Math.Log10(cidMaxValue);
                            }

                            var specificFragment = new SpecificFragment(lipidClass, lipidType, fragment, fragmentationMode, FragmentationType.CID);

                            // Either update the observation list or create a new one
                            List <double> observationList;
                            if (observationDictionary.TryGetValue(specificFragment, out observationList))
                            {
                                observationList.Add(intensity);
                            }
                            else
                            {
                                observationList = new List <double> {
                                    intensity
                                };
                                observationDictionary.Add(specificFragment, observationList);
                            }
                        }

                        // HCD Results
                        foreach (var hcdResult in hcdResultList)
                        {
                            var    fragment  = hcdResult.TheoreticalPeak.Description;
                            double intensity = 0;

                            if (hcdResult.ObservedPeak != null)
                            {
                                intensity = Math.Log10(hcdResult.ObservedPeak.Intensity) / Math.Log10(hcdMaxValue);
                            }

                            var specificFragment = new SpecificFragment(lipidClass, lipidType, fragment, fragmentationMode, FragmentationType.HCD);

                            // Either update the observation list or create a new one
                            List <double> observationList;
                            if (observationDictionary.TryGetValue(specificFragment, out observationList))
                            {
                                observationList.Add(intensity);
                            }
                            else
                            {
                                observationList = new List <double> {
                                    intensity
                                };
                                observationDictionary.Add(specificFragment, observationList);
                            }
                        }
                    }
                }

                // Assure that the source data file is closed
                globalWorkflow.LcMsRun.Close();
            }

            var liquidScoreModelUnitList = PartitionIntoModelUnits(observationDictionary);
            var liquidScoreModel         = new ScoreModel(liquidScoreModelUnitList);

            return(liquidScoreModel);
        }