public LipidGroupSearchResult(LipidTarget lipidTarget, List <Lipid> lipidList, SpectrumSearchResult spectrumSearchResult, ScoreModel scoreModel) { LipidTarget = lipidTarget; LipidList = lipidList; SpectrumSearchResult = spectrumSearchResult; ShouldExport = false; Score = scoreModel.ScoreLipid(this); if (spectrumSearchResult.HcdSpectrum != null) { DisplayScanNum = spectrumSearchResult.HcdSpectrum.ScanNum; DisplayMz = spectrumSearchResult.HcdSpectrum.IsolationWindow.IsolationWindowTargetMz; } else if (spectrumSearchResult.CidSpectrum != null) { DisplayScanNum = spectrumSearchResult.CidSpectrum.ScanNum; DisplayMz = spectrumSearchResult.CidSpectrum.IsolationWindow.IsolationWindowTargetMz; } if (spectrumSearchResult.PrecursorSpectrum == null) { return; } var pearsonCorrelationCalculator = new PearsonCorrelationFitUtil(); PearsonCorrScore = pearsonCorrelationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); PearsonCorrScoreMinus1 = pearsonCorrelationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition); var cosineCalculator = new CosineFitUtil(); CosineScore = cosineCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); CosineScoreMinus1 = cosineCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition); }
public void PearsonCorrelationFileCombiner(string directoryPath) { var dirFiles = Directory.GetFiles(directoryPath); var correlationCalculator = new PearsonCorrelationFitUtil(); var cosineCalculator = new CosineFitUtil(); // Each dictionary corresponds to a dataset, each dictionary key corresponds to the TSV header. var results = new List <Dictionary <string, List <string> > >(); var headers = new HashSet <string>(); foreach (var pathToResults in dirFiles.Where(path => path.EndsWith(".txt"))) { var datasetName = Path.GetFileNameWithoutExtension(pathToResults); var pathToRaw = GetRawFilePath(directoryPath, datasetName); var rawName = Path.GetFileName(pathToRaw); if (string.IsNullOrEmpty(pathToRaw)) { continue; } var lcmsRun = PbfLcMsRun.GetLcMsRun(pathToRaw); var tolerance = new Tolerance(30, ToleranceUnit.Ppm); using (var reader = new StreamReader(new FileStream(pathToResults, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { results.Add(new Dictionary <string, List <string> >()); // Add dictionary for new dataset. var datasetResults = results.Last(); // Results for the current dataset. var lineCount = 0; var headerToIndex = new Dictionary <string, int>(); while (!reader.EndOfStream) { var line = reader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { continue; } var pieces = line.Split('\t').ToArray(); if (lineCount++ == 0) { // First line for (var i = 0; i < pieces.Length; i++) { var header = pieces[i]; headerToIndex.Add(header, i); datasetResults.Add(header, new List <string>()); } datasetResults.Add("Raw File", new List <string>()); datasetResults.Add("Pearson Corr Score", new List <string>()); datasetResults.Add("Pearson Corr M-1 Score", new List <string>()); datasetResults.Add("Cosine Score", new List <string>()); datasetResults.Add("Cosine M-1 Score", new List <string>()); headers.UnionWith(datasetResults.Keys); continue; } var precursor = Convert.ToInt32(pieces[headerToIndex["Precursor Scan"]]); var commonName = pieces[headerToIndex["Common Name"]]; var adduct = pieces[headerToIndex["Adduct"]]; var spectrum = lcmsRun.GetSpectrum(precursor); if (spectrum == null) { Console.WriteLine("Invalid scan number: {0}", precursor); continue; } var lipid = new Lipid { AdductFull = adduct, CommonName = commonName }; var lipidTarget = lipid.CreateLipidTarget(); var spectrumSearchResult = new SpectrumSearchResult(null, null, spectrum, null, null, new Xic(), lcmsRun) { PrecursorTolerance = tolerance }; var pearsonCorrScore = correlationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); var pearsonCorrMinus1Score = correlationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition); var cosineScore = cosineCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); var cosineMinus1Score = cosineCalculator.GetFitScore( spectrumSearchResult, lipidTarget.Composition); // Add results to results dictionary. datasetResults["Raw File"].Add(rawName); foreach (var header in headerToIndex.Keys) { datasetResults[header].Add(pieces[headerToIndex[header]]); } datasetResults["Pearson Corr Score"].Add(pearsonCorrScore.ToString()); datasetResults["Pearson Corr M-1 Score"].Add(pearsonCorrMinus1Score.ToString()); datasetResults["Cosine Score"].Add(cosineScore.ToString()); datasetResults["Cosine M-1 Score"].Add(cosineMinus1Score.ToString()); } } } // Write results var outputFilePath = Path.Combine(directoryPath, "training.tsv"); using (var writer = new StreamWriter(outputFilePath)) { // Write headers foreach (var header in headers) { writer.Write("{0}\t", header); } writer.WriteLine(); // Write data foreach (var datasetResults in results) { var fileLength = datasetResults["Pearson Corr Score"].Count; for (var i = 0; i < fileLength; i++) { foreach (var header in headers) { var value = datasetResults.ContainsKey(header) ? datasetResults[header][i] : string.Empty; writer.Write("{0}\t", value); } writer.WriteLine(); } } } }