예제 #1
0
        public LipidGroupSearchResult(LipidTarget lipidTarget, List <Lipid> lipidList, SpectrumSearchResult spectrumSearchResult, ScoreModel scoreModel)
        {
            LipidTarget          = lipidTarget;
            LipidList            = lipidList;
            SpectrumSearchResult = spectrumSearchResult;
            ShouldExport         = false;
            Score = scoreModel.ScoreLipid(this);

            if (spectrumSearchResult.HcdSpectrum != null)
            {
                DisplayScanNum = spectrumSearchResult.HcdSpectrum.ScanNum;
                DisplayMz      = spectrumSearchResult.HcdSpectrum.IsolationWindow.IsolationWindowTargetMz;
            }
            else if (spectrumSearchResult.CidSpectrum != null)
            {
                DisplayScanNum = spectrumSearchResult.CidSpectrum.ScanNum;
                DisplayMz      = spectrumSearchResult.CidSpectrum.IsolationWindow.IsolationWindowTargetMz;
            }

            if (spectrumSearchResult.PrecursorSpectrum == null)
            {
                return;
            }

            var pearsonCorrelationCalculator = new PearsonCorrelationFitUtil();

            PearsonCorrScore       = pearsonCorrelationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition);
            PearsonCorrScoreMinus1 = pearsonCorrelationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition);

            var cosineCalculator = new CosineFitUtil();

            CosineScore       = cosineCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition);
            CosineScoreMinus1 = cosineCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition);
        }
예제 #2
0
        public void PearsonCorrelationFileCombiner(string directoryPath)
        {
            var dirFiles = Directory.GetFiles(directoryPath);

            var correlationCalculator = new PearsonCorrelationFitUtil();
            var cosineCalculator      = new CosineFitUtil();

            // Each dictionary corresponds to a dataset, each dictionary key corresponds to the TSV header.
            var results = new List <Dictionary <string, List <string> > >();
            var headers = new HashSet <string>();

            foreach (var pathToResults in dirFiles.Where(path => path.EndsWith(".txt")))
            {
                var datasetName = Path.GetFileNameWithoutExtension(pathToResults);
                var pathToRaw   = GetRawFilePath(directoryPath, datasetName);
                var rawName     = Path.GetFileName(pathToRaw);
                if (string.IsNullOrEmpty(pathToRaw))
                {
                    continue;
                }

                var lcmsRun   = PbfLcMsRun.GetLcMsRun(pathToRaw);
                var tolerance = new Tolerance(30, ToleranceUnit.Ppm);
                using (var reader = new StreamReader(new FileStream(pathToResults, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)))
                {
                    results.Add(new Dictionary <string, List <string> >()); // Add dictionary for new dataset.
                    var datasetResults = results.Last();                    // Results for the current dataset.
                    var lineCount      = 0;
                    var headerToIndex  = new Dictionary <string, int>();
                    while (!reader.EndOfStream)
                    {
                        var line = reader.ReadLine();
                        if (string.IsNullOrWhiteSpace(line))
                        {
                            continue;
                        }

                        var pieces = line.Split('\t').ToArray();

                        if (lineCount++ == 0)
                        {   // First line
                            for (var i = 0; i < pieces.Length; i++)
                            {
                                var header = pieces[i];
                                headerToIndex.Add(header, i);
                                datasetResults.Add(header, new List <string>());
                            }

                            datasetResults.Add("Raw File", new List <string>());
                            datasetResults.Add("Pearson Corr Score", new List <string>());
                            datasetResults.Add("Pearson Corr M-1 Score", new List <string>());
                            datasetResults.Add("Cosine Score", new List <string>());
                            datasetResults.Add("Cosine M-1 Score", new List <string>());
                            headers.UnionWith(datasetResults.Keys);
                            continue;
                        }

                        var precursor  = Convert.ToInt32(pieces[headerToIndex["Precursor Scan"]]);
                        var commonName = pieces[headerToIndex["Common Name"]];
                        var adduct     = pieces[headerToIndex["Adduct"]];
                        var spectrum   = lcmsRun.GetSpectrum(precursor);
                        if (spectrum == null)
                        {
                            Console.WriteLine("Invalid scan number: {0}", precursor);
                            continue;
                        }

                        var lipid = new Lipid {
                            AdductFull = adduct, CommonName = commonName
                        };
                        var lipidTarget          = lipid.CreateLipidTarget();
                        var spectrumSearchResult = new SpectrumSearchResult(null, null, spectrum, null, null, new Xic(), lcmsRun)
                        {
                            PrecursorTolerance = tolerance
                        };
                        var pearsonCorrScore       = correlationCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition);
                        var pearsonCorrMinus1Score = correlationCalculator.GetFitMinus1Score(spectrumSearchResult, lipidTarget.Composition);
                        var cosineScore            = cosineCalculator.GetFitScore(spectrumSearchResult, lipidTarget.Composition);
                        var cosineMinus1Score      = cosineCalculator.GetFitScore(
                            spectrumSearchResult,
                            lipidTarget.Composition);

                        // Add results to results dictionary.
                        datasetResults["Raw File"].Add(rawName);
                        foreach (var header in headerToIndex.Keys)
                        {
                            datasetResults[header].Add(pieces[headerToIndex[header]]);
                        }

                        datasetResults["Pearson Corr Score"].Add(pearsonCorrScore.ToString());
                        datasetResults["Pearson Corr M-1 Score"].Add(pearsonCorrMinus1Score.ToString());
                        datasetResults["Cosine Score"].Add(cosineScore.ToString());
                        datasetResults["Cosine M-1 Score"].Add(cosineMinus1Score.ToString());
                    }
                }
            }

            // Write results
            var outputFilePath = Path.Combine(directoryPath, "training.tsv");

            using (var writer = new StreamWriter(outputFilePath))
            {
                // Write headers
                foreach (var header in headers)
                {
                    writer.Write("{0}\t", header);
                }

                writer.WriteLine();

                // Write data
                foreach (var datasetResults in results)
                {
                    var fileLength = datasetResults["Pearson Corr Score"].Count;
                    for (var i = 0; i < fileLength; i++)
                    {
                        foreach (var header in headers)
                        {
                            var value = datasetResults.ContainsKey(header) ? datasetResults[header][i] : string.Empty;
                            writer.Write("{0}\t", value);
                        }

                        writer.WriteLine();
                    }
                }
            }
        }