public void TestFitMinusOneScore(int precursor, string adduct, string commonName, string id, string rawFilePath) { var lipid = new Lipid() { AdductFull = adduct, CommonName = commonName }; var lipidTarget = lipid.CreateLipidTarget(); var composition = lipidTarget.Composition; var compMinus1 = new Composition(composition.C, composition.H - 1, composition.N, composition.O, composition.S, composition.P); //Subtract one hydrogen to make this a minus1 fit score var lcmsRun = PbfLcMsRun.GetLcMsRun(rawFilePath); var spectrum = lcmsRun.GetSpectrum(precursor); var relativeIntensityThreshold = 0.1; var tolerance = new Tolerance(30, ToleranceUnit.Ppm); //Get the values to use to calculate pearson correlation var observedPeaks = LipidUtil.GetAllIsotopePeaks(spectrum, compMinus1, tolerance, relativeIntensityThreshold); if (observedPeaks == null) { Console.WriteLine("Observed peaks is null for scan " + id); } var isotopomerEnvelope = IsoProfilePredictor.GetIsotopomerEnvelop( compMinus1.C, compMinus1.H, compMinus1.N, compMinus1.O, compMinus1.S); var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } Console.WriteLine("The theoretical y values are: "); foreach (var value in isotopomerEnvelope.Envolope) { Console.WriteLine(value + ", "); } Console.WriteLine("The observed peak intensity x values are: "); foreach (var value in observedIntensities) { Console.WriteLine(value + ", "); } }
public void PearsonCorrelationFileCombiner(string directoryPath) { var dirFiles = Directory.GetFiles(directoryPath); var correlationCalculator = new PearsonCorrelationFitUtil(); var cosineCalculator = new CosineFitUtil(); // Each dictionary corresponds to a dataset, each dictionary key corresponds to the TSV header. var results = new List <Dictionary <string, List <string> > >(); var headers = new HashSet <string>(); foreach (var pathToResults in dirFiles.Where(path => path.EndsWith(".txt"))) { var datasetName = Path.GetFileNameWithoutExtension(pathToResults); var pathToRaw = GetRawFilePath(directoryPath, datasetName); var rawName = Path.GetFileName(pathToRaw); if (string.IsNullOrEmpty(pathToRaw)) { continue; } var lcmsRun = PbfLcMsRun.GetLcMsRun(pathToRaw); var tolerance = new Tolerance(30, ToleranceUnit.Ppm); using (var reader = new StreamReader(new FileStream(pathToResults, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { results.Add(new Dictionary <string, List <string> >()); // Add dictionary for new dataset. var datasetResults = results.Last(); // Results for the current dataset. var lineCount = 0; var headerToIndex = new Dictionary <string, int>(); while (!reader.EndOfStream) { var line = reader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { continue; } var pieces = line.Split('\t').ToArray(); if (lineCount++ == 0) { // First line for (var i = 0; i < pieces.Length; i++) { var header = pieces[i]; headerToIndex.Add(header, i); datasetResults.Add(header, new List <string>()); } datasetResults.Add("Raw File", new List <string>()); datasetResults.Add("Pearson Corr Score", new List <string>()); datasetResults.Add("Pearson Corr M-1 Score", new List <string>()); datasetResults.Add("Cosine Score", new List <string>()); datasetResults.Add("Cosine M-1 Score", new List <string>()); headers.UnionWith(datasetResults.Keys); continue; } var precursor = Convert.ToInt32(pieces[headerToIndex["Precursor Scan"]]); var commonName = pieces[headerToIndex["Common Name"]]; var adduct = pieces[headerToIndex["Adduct"]]; var spectrum = lcmsRun.GetSpectrum(precursor); if (spectrum == null) { Console.WriteLine("Invalid scan number: {0}", precursor); continue; } var lipid = new Lipid { AdductFull = adduct, CommonName = commonName }; var lipidTarget = lipid.CreateLipidTarget(); var spectrumSearchResult = new SpectrumSearchResult(null, null, spectrum, null, null, new Xic(), lcmsRun) { PrecursorTolerance = tolerance }; var pearsonCorrScore = correlationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); var pearsonCorrMinus1Score = correlationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition); var cosineScore = cosineCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); var cosineMinus1Score = cosineCalculator.GetFitScore( spectrumSearchResult, lipidTarget.Composition); // Add results to results dictionary. datasetResults["Raw File"].Add(rawName); foreach (var header in headerToIndex.Keys) { datasetResults[header].Add(pieces[headerToIndex[header]]); } datasetResults["Pearson Corr Score"].Add(pearsonCorrScore.ToString()); datasetResults["Pearson Corr M-1 Score"].Add(pearsonCorrMinus1Score.ToString()); datasetResults["Cosine Score"].Add(cosineScore.ToString()); datasetResults["Cosine M-1 Score"].Add(cosineMinus1Score.ToString()); } } } // Write results var outputFilePath = Path.Combine(directoryPath, "training.tsv"); using (var writer = new StreamWriter(outputFilePath)) { // Write headers foreach (var header in headers) { writer.Write("{0}\t", header); } writer.WriteLine(); // Write data foreach (var datasetResults in results) { var fileLength = datasetResults["Pearson Corr Score"].Count; for (var i = 0; i < fileLength; i++) { foreach (var header in headers) { var value = datasetResults.ContainsKey(header) ? datasetResults[header][i] : string.Empty; writer.Write("{0}\t", value); } writer.WriteLine(); } } } }
public void TestPearsonCorrelationWholeFile(string directoryPath) { var dirFiles = Directory.GetFiles(directoryPath); var correlationCalculator = new PearsonCorrelationFitUtil(); foreach (var pathToResults in dirFiles.Where(path => path.EndsWith(".txt"))) { var datasetName = Path.GetFileNameWithoutExtension(pathToResults); var pathToRaw = GetRawFilePath(directoryPath, datasetName); if (string.IsNullOrEmpty(pathToRaw)) { continue; } var lcmsRun = PbfLcMsRun.GetLcMsRun(pathToRaw); var tolerance = new Tolerance(30, ToleranceUnit.Ppm); var rawFileName = Path.GetFileName(pathToRaw); var datasetDirPath = Path.GetDirectoryName(pathToResults); var outputFileName = string.Format("{0}_training.tsv", datasetName); var outputPath = Path.Combine(datasetDirPath, outputFileName); using (var writer = new StreamWriter(outputPath)) using (var reader = new StreamReader(new FileStream(pathToResults, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { var lineCount = 0; var headerToIndex = new Dictionary <string, int>(); while (!reader.EndOfStream) { var line = reader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { continue; } var pieces = line.Split('\t').ToArray(); if (lineCount++ == 0) { // First line writer.Write("Raw File\t"); for (var i = 0; i < pieces.Length; i++) { headerToIndex.Add(pieces[i], i); writer.Write("{0}\t", pieces[i]); } writer.WriteLine("Fit Score\tFit M-1 Score"); continue; } var precursor = Convert.ToInt32(pieces[headerToIndex["Precursor Scan"]]); var commonName = pieces[headerToIndex["Common Name"]]; var adduct = pieces[headerToIndex["Adduct"]]; var spectrum = lcmsRun.GetSpectrum(precursor); if (spectrum == null) { Console.WriteLine("Invalid scan number: {0}", precursor); continue; } var lipid = new Lipid { AdductFull = adduct, CommonName = commonName }; var lipidTarget = lipid.CreateLipidTarget(); var spectrumSearchResult = new SpectrumSearchResult(null, null, spectrum, null, null, new Xic(), lcmsRun) { PrecursorTolerance = tolerance }; var fitScore = correlationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition); var fitMinus1Score = correlationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition); writer.Write(rawFileName + "\t"); writer.Write(line); writer.WriteLine("{0}\t{1}", fitScore, fitMinus1Score); } } } }
public void FillCompAndMassForTargetsFile() { const string targetsFile = @"E:\Source\Liquid\trunk\LiquidTest\testFiles\Global_LipidMaps_POS_7b_Decoys.txt"; const string outputfile = @"E:\Source\Liquid\trunk\LiquidTest\testFiles\Global_LipidMaps_POS_7b_Decoys_test.txt"; const int massCol = 6; const int compCol = 7; var output = new List <string>(); using (var targets = new StreamReader(new FileStream(targetsFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { var header = targets.ReadLine(); output.Add(header); while (!targets.EndOfStream) { var target = targets.ReadLine(); if (string.IsNullOrWhiteSpace(target)) { continue; } var splitTarget = target.Split('\t'); try { if (string.IsNullOrEmpty(splitTarget[massCol]) || string.IsNullOrEmpty(splitTarget[compCol])) { var lipid = new Lipid { CommonName = splitTarget[1], AdductFull = splitTarget[2] }; var newTarget = lipid.CreateLipidTarget(); splitTarget[massCol] = newTarget.Composition.Mass.ToString(CultureInfo.InvariantCulture); splitTarget[compCol] = newTarget.Composition.ToPlainString(); } var rebuilt = new StringBuilder(); rebuilt.Append(splitTarget[0]); for (var i = 1; i < splitTarget.Length; i++) { rebuilt.Append("\t" + splitTarget[i]); } output.Add(rebuilt.ToString()); } catch (Exception) { // Ignore the error } } } using (var outstream = new StreamWriter(outputfile)) { foreach (var x in output) { outstream.WriteLine(x); } } }