public void RunTraining(string rawDirectoryPath, string targetFilePath, string decoyFilePath, double hcdError = 30, double cidError = 500) { // Read target file var targetReader = new LipidMapsDbReader <Lipid>(); var targets = targetReader.ReadFile(new FileInfo(targetFilePath)); // Read decoy file var decoyReader = new LipidMapsDbReader <Lipid>(); var decoys = decoyReader.ReadFile(new FileInfo(decoyFilePath)); var files = Directory.GetFiles(rawDirectoryPath); foreach (var rawFilePath in files.Where(file => file.EndsWith(".raw"))) { // create output paths var rawFileName = Path.GetFileName(rawFilePath); var datasetPath = Path.GetDirectoryName(rawFilePath); var datasetName = Path.GetFileNameWithoutExtension(rawFilePath); var targetResultsPath = Path.Combine(datasetPath, string.Format("{0}_target.tsv", datasetName)); var decoyResultsPath = Path.Combine(datasetPath, string.Format("{0}_decoy.tsv", datasetName)); // Run liquid global workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); var targetResults = globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError); var decoyResults = globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError); // Output results LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName); LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } }
public void OnWriteTargetInfo(string fileLocation) { IProgress <int> progress = new Progress <int>(ReportGlobalWorkflowProgress); LipidGroupSearchResultWriter.OutputTargetInfo(LipidTargetList, fileLocation, Path.GetFileName(RawFilePath), progress); progress.Report(0); }
public void OnWriteFragmentInfo(string fileLocation) { IProgress <int> progress = new Progress <int>(ReportFragmentSearchProgress); var resultsToExport = SpectrumSearchResultList.Where(x => x.ShouldExport).ToList(); LipidGroupSearchResultWriter.OutputFragmentInfo(resultsToExport, TargetAdduct, FragmentSearchList, LcMsRun, fileLocation, Path.GetFileName(RawFilePath), progress); progress.Report(0); }
public void OnExportGlobalResults(string fileLocation) { IProgress <int> progress = new Progress <int>(ReportGlobalWorkflowProgress); var resultsToExport = LipidGroupSearchResultList.Where(x => x.ShouldExport).ToList(); LipidGroupSearchResultWriter.OutputResults(resultsToExport, fileLocation, Path.GetFileName(RawFilePath), progress); progress.Report(0); }
/// <summary> /// Main functionality for running the LIQUID workflow and outputting the results /// </summary> /// <param name="targetsFilePath"></param> /// <param name="outputFileName"></param> /// <param name="datasetNamesList"></param> private void RunWorkflowAndOutput(string targetsFilePath, string outputFileName, List <string> datasetNamesList) { var targetsFileInfo = new FileInfo(targetsFilePath); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(targetsFileInfo); var headerWritten = false; foreach (var datasetName in datasetNamesList) { var rawFileName = datasetName + ".raw"; var rawFilePath = Path.Combine(@"D:\Data\Liquid\Original", rawFileName); Console.WriteLine(DateTime.Now + ": Processing " + datasetName); if (File.Exists(rawFilePath)) { Console.WriteLine(DateTime.Now + ": Dataset already exists"); } else { Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it"); // Lookup in DMS via Mage var dmsFolder = DmsDatasetFinder.FindLocationOfDataset(datasetName); var dmsDirectoryInfo = new DirectoryInfo(dmsFolder); var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName); // Copy Locally // TODO: Handle files that are on MyEMSL Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName); File.Copy(fullPathToDmsFile, rawFilePath); Console.WriteLine(DateTime.Now + ": Copy complete"); } // Setup workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); // Run workflow var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); if (!headerWritten) { LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, true); headerWritten = true; } else { LipidGroupSearchResultWriter.OutputResults(lipidGroupSearchResults, outputFileName, rawFileName, null, true, false); } // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } }
public void TestGlobalWorkflowPositive() { var rawFileLocation = @"../../../testFiles/Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05.raw"; var globalWorkflow = new GlobalWorkflow(rawFileLocation); var fileLocation = @"../../../testFiles/Global_LipidMaps_Pos.txt"; var fileInfo = new FileInfo(fileLocation); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(fileInfo); var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>(); // Group results of same scan together var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum); // Grab the result(s) with the best score foreach (var group in resultsGroupedByScan) { var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList(); for (var i = 0; i < 1 && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2) { filteredLipidGroupSearchResults.Add(resultToAdd); } } } if (File.Exists("fragmentOutput.csv")) { File.Delete("fragmentOutput.csv"); } TextWriter textWriter = new StreamWriter("fragmentOutput.csv"); LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter); LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, "Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05"); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); textWriter.Close(); }
public void TestCreateScoringOutput() { const string positiveTargetsFileLocation = @"../../../testFiles/Global_LipidMaps_POS_v3.txt"; var positiveTargetsFileInfo = new FileInfo(positiveTargetsFileLocation); var lipidReader = new LipidMapsDbReader <Lipid>(); var lipidList = lipidReader.ReadFile(positiveTargetsFileInfo); if (File.Exists("fragmentOutput.csv")) { File.Delete("fragmentOutput.csv"); } TextWriter textWriter = new StreamWriter("fragmentOutput.csv"); var datasetNames = new List <string> { //datasetNames.Add("Dey_lipids_Top_1_1_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_1_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_1_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_1_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_1_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_1_pos_dil_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Top_2_3_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_1_pos_dil_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_2_pos_Gimli_RZ-12-07-05"); //datasetNames.Add("Dey_lipids_Bottom_2_3_pos_Gimli_RZ-12-07-05"); "XGA121_lipid_Calu3_1", "XGA121_lipid_Calu3_2", "XGA121_lipid_Calu3_3", "XGA121_lipid_Skin_1", "XGA121_lipid_Skin_2", "XGA121_lipid_Skin_3", "XGA121_lipid_plasma_1", "XGA121_lipid_plasma_2", "XGA121_lipid_plasma_3", "Vero_01_CM_0d_4_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_CM_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_4_Lipid_POS_Gimli_15Jan14_13-07-04", "Vero_01_MTBE_0d_3_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_2_Lipid_POS_Gimli_15Jan14_13-07-01", "Vero_01_MTBE_0d_1_Lipid_POS_Gimli_15Jan14_13-07-01", "LCA_Atta_B_gar2_b_Reruns_31May13_Gimli_12-07-01", "LCA_Atta_T_gar1_a1_Reruns_31May13_Gimli_12-07-01", "LCA_Atta_M_gar3_a_Reruns_31May13_Gimli_12-07-01", "Da_12_1_POS_3K_Gimli_9Oct13_13-07-01", "Da_24_1_POS_3K_Gimli_9Oct13_13-07-01", //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_14Jan14_13-07-01"); //datasetNames.Add("Lipid_QC_1_14Jan_POS_Gimli_17JAN_13-07-01"); "Daphnia_gut_TLE_POS_Gimli_21Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_01_POS_Gimli_24Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_02_POS_Gimli_24Jan14_13-07-01", "OMICS_HH_CDT_Lip_108_03_POS_Gimli_24Jan14_13-07-01", "Oscar_28days_TLE__POS_04Feb14_13-07-01", "Oscar_21days_TLE__POS_04Feb14_13-07-01", "Oscar_21days_dark_TLE__POS_04Feb14_13-07-01", "Oscar_14day_TLE__POS_04Feb14_13-07-01" }; for (var datasetIndex = 0; datasetIndex < datasetNames.Count; datasetIndex++) { var datasetName = datasetNames[datasetIndex]; var rawFileName = datasetName + ".raw"; Console.WriteLine(DateTime.Now + ": Processing " + datasetName); if (File.Exists(rawFileName)) { Console.WriteLine(DateTime.Now + ": Dataset already exists"); } else { Console.WriteLine(DateTime.Now + ": Dataset does not exist locally, so we will go get it"); // Lookup in DMS via Mage var dmsFolder = DmsDatasetFinder.FindLocationOfDataset(datasetName); var dmsDirectoryInfo = new DirectoryInfo(dmsFolder); var fullPathToDmsFile = Path.Combine(dmsDirectoryInfo.FullName, rawFileName); // Copy Locally // TODO: Handle files that are on MyEMSL Console.WriteLine(DateTime.Now + ": Copying dataset from " + dmsDirectoryInfo.FullName); File.Copy(fullPathToDmsFile, rawFileName); Console.WriteLine(DateTime.Now + ": Copy complete"); } // Setup workflow var globalWorkflow = new GlobalWorkflow(rawFileName); // Run workflow var lipidGroupSearchResults = globalWorkflow.RunGlobalWorkflow(lipidList, 30, 500); var filteredLipidGroupSearchResults = new List <LipidGroupSearchResult>(); // Group results of same scan together var resultsGroupedByScan = lipidGroupSearchResults.GroupBy(x => x.SpectrumSearchResult.HcdSpectrum.ScanNum); // Grab the result(s) with the best score foreach (var group in resultsGroupedByScan) { var groupOrdered = group.OrderByDescending(x => x.SpectrumSearchResult.Score).ToList(); for (var i = 0; i < 1 && i < groupOrdered.Count; i++) { var resultToAdd = groupOrdered[i]; if (resultToAdd.LipidTarget.LipidClass == LipidClass.PC && resultToAdd.LipidTarget.AcylChainList.Count(x => x.NumCarbons > 0) == 2 && resultToAdd.LipidTarget.AcylChainList.Count(x => x.AcylChainType == AcylChainType.Standard) == 2) { filteredLipidGroupSearchResults.Add(resultToAdd); } } } // Output results if (datasetIndex == 0) { LipidGroupSearchResultWriter.AddHeaderForScoring(filteredLipidGroupSearchResults[0], textWriter); } LipidGroupSearchResultWriter.WriteToCsvForScoring(filteredLipidGroupSearchResults, textWriter, datasetName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } textWriter.Close(); }
public void RunTrainingOnFileList( string fileListPath, string posTargetFilePath, string posDecoyFilePath, string negTargetFilePath, string negDecoyFilePath, double hcdError = 30, double cidError = 500) { // Read positive target file var posTargetReader = new LipidMapsDbReader <Lipid>(); var posTargets = posTargetReader.ReadFile(new FileInfo(posTargetFilePath)); // Read positive decoy file var posDecoyReader = new LipidMapsDbReader <Lipid>(); var posDecoys = posDecoyReader.ReadFile(new FileInfo(posDecoyFilePath)); // Read positive target file var negTargetReader = new LipidMapsDbReader <Lipid>(); var negTargets = negTargetReader.ReadFile(new FileInfo(negTargetFilePath)); // Read positive decoy file var negDecoyReader = new LipidMapsDbReader <Lipid>(); var negDecoys = negDecoyReader.ReadFile(new FileInfo(negDecoyFilePath)); var outputDirectory = Path.GetDirectoryName(fileListPath); var errorFile = Path.Combine(outputDirectory, "failedDatasets.txt"); foreach (var datasetName in File.ReadLines(fileListPath)) { if (datasetName.StartsWith("//")) { continue; } try { // create output paths var rawFilePath = GetRawFilePath(outputDirectory, datasetName); var rawFileName = Path.GetFileName(rawFilePath); var targetResultsPath = Path.Combine(outputDirectory, string.Format("{0}_target.tsv", datasetName)); var decoyResultsPath = Path.Combine(outputDirectory, string.Format("{0}_decoy.tsv", datasetName)); IEnumerable <Lipid> targets; IEnumerable <Lipid> decoys; // Select targets and decoys var lowerCaseName = datasetName.ToLower(); if (lowerCaseName.Contains("pos")) { targets = posTargets; decoys = posDecoys; } else { targets = negTargets; decoys = negDecoys; } // Run liquid global workflow var globalWorkflow = new GlobalWorkflow(rawFilePath); var targetResults = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(targets, hcdError, cidError)); var decoyResults = GetBestResultPerSpectrum(globalWorkflow.RunGlobalWorkflow(decoys, hcdError, cidError)); // Output results LipidGroupSearchResultWriter.OutputResults(targetResults, targetResultsPath, rawFileName); LipidGroupSearchResultWriter.OutputResults(decoyResults, decoyResultsPath, rawFileName); // Assure that the source data file is closed globalWorkflow.LcMsRun.Close(); } catch (Exception) { Console.WriteLine("ERROR: Could not process dataset {0}.", datasetName); using (var streamWriter = new StreamWriter(errorFile, true)) { streamWriter.WriteLine(datasetName); } } } }