public void RunTraining(string rawDirectoryPath, string targetFilePath, string decoyFilePath, double hcdError = 30, double cidError = 500) { // Read target file var targetReader = new LipidMapsDbReader <Lipid>(); var targets = targetReader.ReadFile(new FileInfo(targetFilePath)); // Read decoy file var decoyReader = new LipidMapsDbReader <Lipid>(); var decoys = decoyReader.ReadFile(new FileInfo(decoyFilePath)); var files = Directory.GetFiles(rawDirectoryPath); foreach (var rawFilePath in files.Where(file => file.EndsWith(".raw"))) { // create output paths var rawFileName = Path.GetFileName(rawFilePath); var datasetPath = Path.GetDirectoryName(rawFilePath); var datasetName = Path.GetFileNameWithoutExtension(rawFilePath); var targetResultsPath = Path.Combine(datasetPath, string.Format("{0}_target.tsv", datasetName)); var decoyResultsPath = Path.Combine(datasetPath, string.Format("{0}_decoy.tsv", datasetName)); // Run liquid global workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); var targetResults = globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError); var decoyResults = globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError); // Output results LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName); LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } }
/// <summary> /// Main functionality for running the LIQUID workflow and outputting the results /// </summary> /// <param name="targetsFilePath"></param> /// <param name="outputFileName"></param> /// <param name="datasetNamesList"></param> private void RunWorkflowAndOutput(string targetsFilePath, string outputFileName, List <string> datasetNamesList) { var targetsFileInfo = new FileInfo(targetsFilePath); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(targetsFileInfo); var headerWritten = false; foreach (var datasetName in datasetNamesList) { var rawFileName = datasetName + ".raw"; var rawFilePath = Path.Combine(@"D:\Data\Liquid\Original", rawFileName); Console.WriteLine(DateTime.Now + ": Processing " + datasetName); if (File.Exists(rawFilePath)) { Console.WriteLine(DateTime.Now + ": Dataset already exists"); } else { Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it"); // Lookup in DMS via Mage var dmsFolder = DmsDatasetFinder.FindLocationOfDataset(datasetName); var dmsDirectoryInfo = new DirectoryInfo(dmsFolder); var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName); // Copy Locally // TODO: Handle files that are on MyEMSL Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName); File.Copy(fullPathToDmsFile, rawFilePath); Console.WriteLine(DateTime.Now + ": Copy complete"); } // Setup workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); // Run workflow var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); if (!headerWritten) { LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, true); headerWritten = true; } else { LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, false); } // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } }
public void TestGlobalWorkflowNegative() { var rawFileLocation = @"../../../testFiles/Dey_Lipids_Top_2_3_rerun_Neg_05Jul13_Gimli_12-07-05.raw"; var globalWorkflow = new GlobalWorkflow(rawFileLocation); var fileLocation = @"../../../testFiles/Global_LipidMaps_Neg.txt"; var fileInfo = new FileInfo(fileLocation); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(fileInfo); globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); }
public void TestGlobalWorkflowPositive() { var rawFileLocation = @"../../../testFiles/Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05.raw"; var globalWorkflow = new GlobalWorkflow(rawFileLocation); var fileLocation = @"../../../testFiles/Global_LipidMaps_Pos.txt"; var fileInfo = new FileInfo(fileLocation); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(fileInfo); var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>(); // Group results of same scan together var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum); // Grab the result(s) with the best score foreach (var group in resultsGroupedByScan) { var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList(); for (var i = 0; i < 1 && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2) { filteredLipidGroupSearchResults.Add(resultToAdd); } } } if (File.Exists("fragmentOutput.csv")) { File.Delete("fragmentOutput.csv"); } TextWriter textWriter = new StreamWriter("fragmentOutput.csv"); LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter); LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, "Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05"); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); textWriter.Close(); }
public void OnProcessAllTarget(double hcdError, double cidError, FragmentationMode fragmentationMode, int numResultsPerScanToInclude) { IProgress <int> progress = new Progress <int>(ReportGlobalWorkflowProgress); // Make sure to only look at targets that match the fragmentation mode var targetsToProcess = LipidTargetList.Where(x => x.LipidTarget.FragmentationMode == fragmentationMode); // Run global analysis LipidGroupSearchResultList = new List <LipidGroupSearchResult>(); IEnumerable <IGrouping <double, LipidGroupSearchResult> > resultsGrouped; List <LipidGroupSearchResult> lipidGroupSearchResultList; if (AverageSpec) { lipidGroupSearchResultList = GlobalWorkflow.RunGlobalWorkflowAvgSpec(targetsToProcess, LcMsRun, hcdError, cidError, ScoreModel, progress); resultsGrouped = lipidGroupSearchResultList.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum?.IsolationWindow.IsolationWindowTargetMz ?? x.SpectrumSearchResult.CidSpectrum.IsolationWindow.IsolationWindowTargetMz); } else { lipidGroupSearchResultList = GlobalWorkflow.RunGlobalWorkflow(targetsToProcess, LcMsRun, hcdError, cidError, ScoreModel, progress); resultsGrouped = lipidGroupSearchResultList.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum?.ScanNum ?? (double)x.SpectrumSearchResult.CidSpectrum.ScanNum); } // Group results of same scan together // Grab the result(s) with the best score foreach (var group in resultsGrouped) { var groupOrdered = group.OrderByDescending(x => x.Score).ToList(); for (var i = 0; i < numResultsPerScanToInclude && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; LipidGroupSearchResultList.Add(resultToAdd); } } OnPropertyChanged("LipidGroupSearchResultList"); progress.Report(0); }
public void TestCreateScoringOutput() { const string positiveTargetsFileLocation = @"../../../testFiles/Global_LipidMaps_POS_v3.txt"; var positiveTargetsFileInfo = new FileInfo(positiveTargetsFileLocation); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(positiveTargetsFileInfo); if (File.Exists("fragmentOutput.csv")) { File.Delete("fragmentOutput.csv"); } TextWriter textWriter = new StreamWriter("fragmentOutput.csv"); var datasetNames = new List <string> { //datasetNames.Add("Dey_lipids_Top_1_1_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_1_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_1_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_1_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_1_pos_dil_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_3_pos_Gimli_RZ-12-07-05"); "XGA121_lipid_Calu3_1", "XGA121_lipid_Calu3_2", "XGA121_lipid_Calu3_3", "XGA121_lipid_Skin_1", "XGA121_lipid_Skin_2", "XGA121_lipid_Skin_3", "XGA121_lipid_plasma_1", "XGA121_lipid_plasma_2", "XGA121_lipid_plasma_3", "Vero_01_CM_0d_4_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_4_Lipid_POS_Gimli_15Jan14_13-07-04", "Vero_01_MTBE_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01", "LCA_Atta_B_gar2_b_Reruns_31May13_Gimli_12-07-01", "LCA_Atta_T_gar1_a1_Reruns_31May13_Gimli_12-07-01", "LCA_Atta_M_gar3_a_Reruns_31May13_Gimli_12-07-01", "Da_12_1_POS_3K_Gimli_9Oct13_13-07-01", "Da_24_1_POS_3K_Gimli_9Oct13_13-07-01", //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_14Jan14_13-07-01"); //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_17JAN_13-07-01"); "Daphnia_gut_TLE_POS_Gimli_21Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_01_POS_Gimli_24Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_02_POS_Gimli_24Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_03_POS_Gimli_24Jan14_13-07-01", "Oscar_28days_TLE__POS_04Feb14_13-07-01", "Oscar_21days_TLE__POS_04Feb14_13-07-01", "Oscar_21days_dark_TLE__POS_04Feb14_13-07-01", "Oscar_14day_TLE__POS_04Feb14_13-07-01" }; for (var datasetIndex = 0; datasetIndex < datasetNames.Count; datasetIndex++) { var datasetName = datasetNames[datasetIndex]; var rawFileName = datasetName + ".raw"; Console.WriteLine(DateTime.Now + ": Processing " + datasetName); if (File.Exists(rawFileName)) { Console.WriteLine(DateTime.Now + ": Dataset already exists"); } else { Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it"); // Lookup in DMS via Mage var dmsFolder = DmsDatasetFinder.FindLocationOfDataset(datasetName); var dmsDirectoryInfo = new DirectoryInfo(dmsFolder); var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName); // Copy Locally // TODO: Handle files that are on MyEMSL Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName); File.Copy(fullPathToDmsFile, rawFileName); Console.WriteLine(DateTime.Now + ": Copy complete"); } // Setup workflow var globalWorkflow = new GlobalWorkflow(rawFileName); // Run workflow var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>(); // Group results of same scan together var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum); // Grab the result(s) with the best score foreach (var group in resultsGroupedByScan) { var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList(); for (var i = 0; i < 1 && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2) { filteredLipidGroupSearchResults.Add(resultToAdd); } } } // Output results if (datasetIndex == 0) { LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter); } LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, datasetName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } textWriter.Close(); }
public void RunTrainingOnFileList( string fileListPath, string posTargetFilePath, string posDecoyFilePath, string negTargetFilePath, string negDecoyFilePath, double hcdError = 30, double cidError = 500) { // Read positive target file var posTargetReader = new LipidMapsDbReader <Lipid>(); var posTargets = posTargetReader.ReadFile(new FileInfo(posTargetFilePath)); // Read positive decoy file var posDecoyReader = new LipidMapsDbReader <Lipid>(); var posDecoys = posDecoyReader.ReadFile(new FileInfo(posDecoyFilePath)); // Read positive target file var negTargetReader = new LipidMapsDbReader <Lipid>(); var negTargets = negTargetReader.ReadFile(new FileInfo(negTargetFilePath)); // Read positive decoy file var negDecoyReader = new LipidMapsDbReader <Lipid>(); var negDecoys = negDecoyReader.ReadFile(new FileInfo(negDecoyFilePath)); var outputDirectory = Path.GetDirectoryName(fileListPath); var errorFile = Path.Combine(outputDirectory, "failedDatasets.txt"); foreach (var datasetName in File.ReadLines(fileListPath)) { if (datasetName.StartsWith("//")) { continue; } try { // create output paths var rawFilePath = GetRawFilePath(outputDirectory, datasetName); var rawFileName = Path.GetFileName(rawFilePath); var targetResultsPath = Path.Combine(outputDirectory, string.Format("{0}_target.tsv", datasetName)); var decoyResultsPath = Path.Combine(outputDirectory, string.Format("{0}_decoy.tsv", datasetName)); IEnumerable <Lipid> targets; IEnumerable <Lipid> decoys; // Select targets and decoys var lowerCaseName = datasetName.ToLower(); if (lowerCaseName.Contains("pos")) { targets = posTargets; decoys = posDecoys; } else { targets = negTargets; decoys = negDecoys; } // Run liquid global workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); var targetResults = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError)); var decoyResults = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError)); // Output results LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName); LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } catch (Exception) { Console.WriteLine("ERROR: Could not process dataset {0}.", datasetName); using (var streamWriter = new StreamWriter(errorFile, true)) { streamWriter.WriteLine(datasetName); } } } }
public ScoreModel CreateScoreModel(double hcdError, double cidError) { const int numTopHitsToConsider = 1; var observationDictionary = new Dictionary <SpecificFragment, List <double> >(); foreach (var datasetLocation in DatasetLocations) { // Setup workflow var globalWorkflow = new GlobalWorkflow(datasetLocation); // Run workflow var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(LipidList, 30, 500); // Group results of same scan together var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum); // Grab the result(s) with the best score foreach (var group in resultsGroupedByScan) { var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList(); for (var i = 0; i < numTopHitsToConsider && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; var lipidTarget = resultToAdd.LipidTarget; var lipidClass = lipidTarget.LipidClass; var lipidType = lipidTarget.LipidType; var fragmentationMode = lipidTarget.FragmentationMode; var spectrumSearchResult = resultToAdd.SpectrumSearchResult; var cidResultList = spectrumSearchResult.CidSearchResultList; var hcdResultList = spectrumSearchResult.HcdSearchResultList; var cidMaxValue = spectrumSearchResult.CidSpectrum.Peaks.Any() ? spectrumSearchResult.CidSpectrum.Peaks.Max(x => x.Intensity) : 1; var hcdMaxValue = spectrumSearchResult.HcdSpectrum.Peaks.Any() ? spectrumSearchResult.HcdSpectrum.Peaks.Max(x => x.Intensity) : 1; // CID Results foreach (var cidResult in cidResultList) { var fragment = cidResult.TheoreticalPeak.Description; double intensity = 0; if (cidResult.ObservedPeak != null) { intensity = Math.Log10(cidResult.ObservedPeak.Intensity) / Math.Log10(cidMaxValue); } var specificFragment = new SpecificFragment(lipidClass, lipidType, fragment, fragmentationMode, FragmentationType.CID); // Either update the observation list or create a new one List <double> observationList; if (observationDictionary.TryGetValue(specificFragment, out observationList)) { observationList.Add(intensity); } else { observationList = new List <double> { intensity }; observationDictionary.Add(specificFragment, observationList); } } // HCD Results foreach (var hcdResult in hcdResultList) { var fragment = hcdResult.TheoreticalPeak.Description; double intensity = 0; if (hcdResult.ObservedPeak != null) { intensity = Math.Log10(hcdResult.ObservedPeak.Intensity) / Math.Log10(hcdMaxValue); } var specificFragment = new SpecificFragment(lipidClass, lipidType, fragment, fragmentationMode, FragmentationType.HCD); // Either update the observation list or create a new one List <double> observationList; if (observationDictionary.TryGetValue(specificFragment, out observationList)) { observationList.Add(intensity); } else { observationList = new List <double> { intensity }; observationDictionary.Add(specificFragment, observationList); } } } } // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } var liquidScoreModelUnitList = PartitionIntoModelUnits(observationDictionary); var liquidScoreModel = new ScoreModel(liquidScoreModelUnitList); return(liquidScoreModel); }