Esempio n. 1
0
        public bool Run_TdMzCal(InputFile raw_file, List <TopDownHit> topdown_hits)
        {
            all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList();
            //need to reset m/z in case same td hits used for multiple calibration raw files...
            Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge));

            high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList();
            this.raw_file             = raw_file;

            if (high_scoring_topdown_hits.Count < 5)
            {
                return(false);
            }

            myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ?
                           ThermoStaticData.LoadAllStaticData(raw_file.complete_path) :
                           null;
            if (myMsDataFile == null)
            {
                myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path);
            }
            if (myMsDataFile == null)
            {
                return(false);
            }

            DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints();

            if (dataPointAcquisitionResult.Ms1List.Count < 10)
            {
                return(false);
            }

            var myMs1DataPoints = new List <(double[] xValues, double yValue)>();

            for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++)
            {
                //x values
                var explanatoryVariables = new double[4];
                explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz;
                explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime;
                explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent;
                explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime;

                //yvalue
                double mzError = dataPointAcquisitionResult.Ms1List[i].massError;

                myMs1DataPoints.Add((explanatoryVariables, mzError));
            }

            var ms1Model = GetRandomForestModel(myMs1DataPoints);

            CalibrateHitsAndComponents(ms1Model);
            if (Sweet.lollipop.calibrate_raw_files)
            {
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false);
            }
            return(true);
        }
Esempio n. 2
0
        public bool Run_TdMzCal(InputFile raw_file, List <SpectrumMatch> topdown_hits)
        {
            all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList();
            //need to reset m/z in case same td hits used for multiple calibration raw files...
            Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge));

            high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList();
            this.raw_file             = raw_file;

            if (high_scoring_topdown_hits.Count < 5)
            {
                return(false);
            }

            myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ?
                           ThermoStaticData.LoadAllStaticData(raw_file.complete_path) :
                           null;
            if (myMsDataFile == null)
            {
                myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path);
            }
            if (myMsDataFile == null)
            {
                return(false);
            }

            DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints();

            if (dataPointAcquisitionResult.Ms1List.Count < 10)
            {
                return(false);
            }

            if (Sweet.lollipop.mass_calibration)
            {
                var myMs1DataPoints = new List <(double[] xValues, double yValue)>();

                for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++)
                {
                    //x values
                    var explanatoryVariables = new double[4];
                    explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz;
                    explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime;
                    explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent;
                    explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime;

                    //yvalue
                    double mzError = dataPointAcquisitionResult.Ms1List[i].massError;

                    myMs1DataPoints.Add((explanatoryVariables, mzError));
                }

                var ms1Model = GetRandomForestModel(myMs1DataPoints);

                CalibrateHitsAndComponents(ms1Model);
                if (Sweet.lollipop.calibrate_raw_files)
                {
                    MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile,
                                                                          raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false);
                }
            }

            if (Sweet.lollipop.retention_time_calibration)
            {
                var myMs1DataPoints = new List <(double[] xValues, double yValue)>();
                List <SpectrumMatch> firstElutingTopDownHit = new List <SpectrumMatch>();
                List <string>        PFRs = high_scoring_topdown_hits.Select(h => h.pfr_accession).Distinct().ToList();
                foreach (var PFR in PFRs)
                {
                    var firstHitWithPFR = high_scoring_topdown_hits
                                          .Where(h => h.pfr_accession == PFR).OrderBy(h => h.ms2_retention_time).First();
                    firstElutingTopDownHit.Add(firstHitWithPFR);
                }

                for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++)
                {
                    if (firstElutingTopDownHit.Contains(dataPointAcquisitionResult.Ms1List[i].identification))
                    {
                        //x values
                        var explanatoryVariables = new double[1];
                        explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].retentionTime;

                        //yvalue
                        double RTError = dataPointAcquisitionResult.Ms1List[i].RTError;

                        myMs1DataPoints.Add((explanatoryVariables, RTError));
                    }
                }

                if (myMs1DataPoints.Count < 10)
                {
                    return(false);
                }

                var ms1Model = GetRandomForestModel(myMs1DataPoints);

                foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate))
                {
                    c.rt_apex = c.rt_apex - ms1Model.Predict(new double[] { c.rt_apex });
                }
            }
            return(true);
        }
Esempio n. 3
0
        private DataPointAquisitionResults GetDataPoints()
        {
            DataPointAquisitionResults res = new DataPointAquisitionResults()
            {
                Ms1List = new List <LabeledMs1DataPoint>()
            };

            // Set of peaks, identified by m/z and retention time. If a peak is in here, it means it has been a part of an accepted identification, and should be rejected
            var peaksAddedFromMS1HashSet = new HashSet <Tuple <double, int> >();

            foreach (SpectrumMatch identification in high_scoring_topdown_hits.OrderByDescending(h => h.score).ThenBy(h => h.pscore).ThenBy(h => h.reported_mass))
            {
                int scanNum = myMsDataFile.GetClosestOneBasedSpectrumNumber(identification.ms2_retention_time);

                List <int> scanNumbers = new List <int>()
                {
                    scanNum
                };
                int proteinCharge = identification.charge;

                Component matching_component = null;
                if (identification.filename != raw_file.filename) //if calibrating across files find component with matching mass and retention time
                {
                    //NOTE: only looking at components from same raw file... looking for components corresponding to td hits from any files w/ same br, fraction, condition however.
                    //look around theoretical mass of topdown hit identified proteoforms - 10 ppm and 5 minutes same br, tr, fraction, condition (same file!)
                    //if neucode labled, look for the light component mass (loaded in...)
                    List <Component> potential_matches = Sweet.lollipop.calibration_components.
                                                         Where(c => c.input_file.lt_condition == raw_file.lt_condition &&
                                                               c.input_file.biological_replicate == raw_file.biological_replicate &&
                                                               c.input_file.fraction == raw_file.fraction &&
                                                               c.input_file.technical_replicate == raw_file.technical_replicate).ToList();
                    if (potential_matches.Count > 0)
                    {
                        matching_component = potential_matches.Where(c =>
                                                                     Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass) * 1e6 / c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) < Sweet.lollipop.cali_mass_tolerance &&
                                                                     Math.Abs(c.rt_apex - identification.ms1_scan.RetentionTime) < Sweet.lollipop.cali_rt_tolerance).OrderBy(c => Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass)).FirstOrDefault();
                    }
                    else
                    {
                        matching_component = null;
                    }

                    if (matching_component == null)
                    {
                        continue;
                    }
                    scanNumbers.Clear();
                    //get scan numbers using retention time (if raw file is spliced, scan numbers change)
                    double rt = matching_component.min_rt;
                    while (Math.Round(rt, 2) <= Math.Round(matching_component.max_rt, 2))
                    {
                        int scanNumber = myMsDataFile.GetClosestOneBasedSpectrumNumber(rt);
                        scanNumbers.Add(scanNumber);
                        rt = myMsDataFile.GetOneBasedScan(scanNumber + 1).RetentionTime;
                    }
                    proteinCharge = matching_component.charge_states.OrderByDescending(c => c.intensity).First().charge_count;
                    if (matching_component.charge_states.Count == 1)
                    {
                        proteinCharge = identification.charge;
                    }
                }


                var formula = identification.GetChemicalFormula();
                if (formula == null)
                {
                    continue;
                }

                // Calculate isotopic distribution of the full peptide
                var dist = IsotopicDistribution.GetDistribution(formula, 0.1, 0.001);

                double[] masses      = dist.Masses.ToArray();
                double[] intensities = dist.Intensities.ToArray();

                Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x)));

                List <int> scansAdded = new List <int>();
                foreach (int scanNumber in scanNumbers)
                {
                    res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, -1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification));
                    res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, 1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification));
                }
            }
            return(res);
        }