/// <summary> /// Parse a dataset info file to get the datasets to run on. /// </summary> /// <param name="filePath"></param> /// <returns></returns> public static List <DatasetInfo> ParseDatasetInfoFile(string filePath) { var datasets = new List <DatasetInfo>(); var datasetNumber = 0; foreach (var line in File.ReadLines(filePath)) { var parts = line.Split('\t'); if (parts.Length < 3) { continue; } datasetNumber++; var mspfIdFilePath = string.Empty; if (parts.Length > 3) { mspfIdFilePath = parts[3]; } var datasetInfo = new DatasetInfo { Label = parts[0], RawFilePath = parts[1], Ms1FtFilePath = parts[2], MsPfIdFilePath = mspfIdFilePath }; if (string.IsNullOrWhiteSpace(datasetInfo.Label)) { datasetInfo.Label = "Dataset_" + datasetNumber; } datasets.Add(datasetInfo); } return(datasets); }
public static void Main(string[] args) { // Parse file var inputFilePath = args[0]; var datasets = DatasetInfo.ParseDatasetInfoFile(inputFilePath); var fileName = Path.GetFileNameWithoutExtension(inputFilePath); var directory = Path.GetDirectoryName(inputFilePath); var outputfilePath = Path.Combine(directory, string.Format("{0}_crosstab.tsv", fileName)); int nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(100); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); int dataId = 0; foreach (var dataset in datasets) { var run = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0); var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run); if (File.Exists(dataset.MsPfIdFilePath)) { var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder); foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (LcMsFeature feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(dataId, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i].Label); } OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray()); }
public static void Main(string[] args) { if (args.Length == 0) { ShowSyntax(); return; } // Parse file var inputFilePath = args[0]; if (!File.Exists(inputFilePath)) { ConsoleMsgUtils.ShowError("File not found: " + inputFilePath); return; } var datasets = DatasetInfo.ParseDatasetInfoFile(inputFilePath); if (datasets.Count == 0) { ConsoleMsgUtils.ShowError("No valid data found in the dataset info file"); ShowSyntax(); return; } var fileName = Path.GetFileNameWithoutExtension(inputFilePath); var directory = Path.GetDirectoryName(inputFilePath); var crosstabFilename = string.Format("{0}_crosstab.tsv", fileName); string outputfilePath; if (string.IsNullOrWhiteSpace(directory)) { outputfilePath = crosstabFilename; } else { outputfilePath = Path.Combine(directory, crosstabFilename); } var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(100); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); var dataId = 0; foreach (var dataset in datasets) { if (!File.Exists(dataset.RawFilePath)) { ConsoleMsgUtils.ShowError("Instrument file not found: " + dataset.RawFilePath); continue; } if (!File.Exists(dataset.Ms1FtFilePath)) { ConsoleMsgUtils.ShowError("ProMex results file not found: " + dataset.Ms1FtFilePath); continue; } Console.WriteLine("Opening " + dataset.RawFilePath); var run = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0); Console.WriteLine("Opening " + dataset.Ms1FtFilePath); var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run); if (!string.IsNullOrWhiteSpace(dataset.MsPfIdFilePath) && File.Exists(dataset.MsPfIdFilePath)) { Console.WriteLine("Opening " + dataset.MsPfIdFilePath); var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder); foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (var feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(dataId, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); var validResults = 0; for (var datasetIndex = 0; datasetIndex < nDataset; datasetIndex++) { if (datasetIndex >= alignment.CountDatasets) { ConsoleMsgUtils.ShowWarning(string.Format("Could not align {0}; features not found", datasets[datasetIndex].Label)); continue; } alignment.FillMissingFeatures(datasetIndex); Console.WriteLine("{0} has been processed", datasets[datasetIndex].Label); validResults++; } if (validResults > 0) { OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray()); } }