public bool Run_TdMzCal(InputFile raw_file, List <TopDownHit> topdown_hits) { all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList(); //need to reset m/z in case same td hits used for multiple calibration raw files... Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge)); high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList(); this.raw_file = raw_file; if (high_scoring_topdown_hits.Count < 5) { return(false); } myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ? ThermoStaticData.LoadAllStaticData(raw_file.complete_path) : null; if (myMsDataFile == null) { myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path); } if (myMsDataFile == null) { return(false); } DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints(); if (dataPointAcquisitionResult.Ms1List.Count < 10) { return(false); } var myMs1DataPoints = new List <(double[] xValues, double yValue)>(); for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++) { //x values var explanatoryVariables = new double[4]; explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz; explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime; explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent; explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime; //yvalue double mzError = dataPointAcquisitionResult.Ms1List[i].massError; myMs1DataPoints.Add((explanatoryVariables, mzError)); } var ms1Model = GetRandomForestModel(myMs1DataPoints); CalibrateHitsAndComponents(ms1Model); if (Sweet.lollipop.calibrate_raw_files) { MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false); } return(true); }
public bool Run_TdMzCal(InputFile raw_file, List <SpectrumMatch> topdown_hits) { all_topdown_hits = topdown_hits.Where(h => h.score > 0).ToList(); //need to reset m/z in case same td hits used for multiple calibration raw files... Parallel.ForEach(all_topdown_hits, h => h.mz = h.reported_mass.ToMz(h.charge)); high_scoring_topdown_hits = all_topdown_hits.Where(h => h.score >= 40).ToList(); this.raw_file = raw_file; if (high_scoring_topdown_hits.Count < 5) { return(false); } myMsDataFile = Path.GetExtension(raw_file.complete_path) == ".raw" ? ThermoStaticData.LoadAllStaticData(raw_file.complete_path) : null; if (myMsDataFile == null) { myMsDataFile = Mzml.LoadAllStaticData(raw_file.complete_path); } if (myMsDataFile == null) { return(false); } DataPointAquisitionResults dataPointAcquisitionResult = GetDataPoints(); if (dataPointAcquisitionResult.Ms1List.Count < 10) { return(false); } if (Sweet.lollipop.mass_calibration) { var myMs1DataPoints = new List <(double[] xValues, double yValue)>(); for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++) { //x values var explanatoryVariables = new double[4]; explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].mz; explanatoryVariables[1] = dataPointAcquisitionResult.Ms1List[i].retentionTime; explanatoryVariables[2] = dataPointAcquisitionResult.Ms1List[i].logTotalIonCurrent; explanatoryVariables[3] = dataPointAcquisitionResult.Ms1List[i].logInjectionTime; //yvalue double mzError = dataPointAcquisitionResult.Ms1List[i].massError; myMs1DataPoints.Add((explanatoryVariables, mzError)); } var ms1Model = GetRandomForestModel(myMs1DataPoints); CalibrateHitsAndComponents(ms1Model); if (Sweet.lollipop.calibrate_raw_files) { MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, raw_file.directory + "\\" + raw_file.filename + "_calibrated.mzML", false); } } if (Sweet.lollipop.retention_time_calibration) { var myMs1DataPoints = new List <(double[] xValues, double yValue)>(); List <SpectrumMatch> firstElutingTopDownHit = new List <SpectrumMatch>(); List <string> PFRs = high_scoring_topdown_hits.Select(h => h.pfr_accession).Distinct().ToList(); foreach (var PFR in PFRs) { var firstHitWithPFR = high_scoring_topdown_hits .Where(h => h.pfr_accession == PFR).OrderBy(h => h.ms2_retention_time).First(); firstElutingTopDownHit.Add(firstHitWithPFR); } for (int i = 0; i < dataPointAcquisitionResult.Ms1List.Count; i++) { if (firstElutingTopDownHit.Contains(dataPointAcquisitionResult.Ms1List[i].identification)) { //x values var explanatoryVariables = new double[1]; explanatoryVariables[0] = dataPointAcquisitionResult.Ms1List[i].retentionTime; //yvalue double RTError = dataPointAcquisitionResult.Ms1List[i].RTError; myMs1DataPoints.Add((explanatoryVariables, RTError)); } } if (myMs1DataPoints.Count < 10) { return(false); } var ms1Model = GetRandomForestModel(myMs1DataPoints); foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate)) { c.rt_apex = c.rt_apex - ms1Model.Predict(new double[] { c.rt_apex }); } } return(true); }
private DataPointAquisitionResults GetDataPoints() { DataPointAquisitionResults res = new DataPointAquisitionResults() { Ms1List = new List <LabeledMs1DataPoint>() }; // Set of peaks, identified by m/z and retention time. If a peak is in here, it means it has been a part of an accepted identification, and should be rejected var peaksAddedFromMS1HashSet = new HashSet <Tuple <double, int> >(); foreach (SpectrumMatch identification in high_scoring_topdown_hits.OrderByDescending(h => h.score).ThenBy(h => h.pscore).ThenBy(h => h.reported_mass)) { int scanNum = myMsDataFile.GetClosestOneBasedSpectrumNumber(identification.ms2_retention_time); List <int> scanNumbers = new List <int>() { scanNum }; int proteinCharge = identification.charge; Component matching_component = null; if (identification.filename != raw_file.filename) //if calibrating across files find component with matching mass and retention time { //NOTE: only looking at components from same raw file... looking for components corresponding to td hits from any files w/ same br, fraction, condition however. //look around theoretical mass of topdown hit identified proteoforms - 10 ppm and 5 minutes same br, tr, fraction, condition (same file!) //if neucode labled, look for the light component mass (loaded in...) List <Component> potential_matches = Sweet.lollipop.calibration_components. Where(c => c.input_file.lt_condition == raw_file.lt_condition && c.input_file.biological_replicate == raw_file.biological_replicate && c.input_file.fraction == raw_file.fraction && c.input_file.technical_replicate == raw_file.technical_replicate).ToList(); if (potential_matches.Count > 0) { matching_component = potential_matches.Where(c => Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass) * 1e6 / c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) < Sweet.lollipop.cali_mass_tolerance && Math.Abs(c.rt_apex - identification.ms1_scan.RetentionTime) < Sweet.lollipop.cali_rt_tolerance).OrderBy(c => Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass)).FirstOrDefault(); } else { matching_component = null; } if (matching_component == null) { continue; } scanNumbers.Clear(); //get scan numbers using retention time (if raw file is spliced, scan numbers change) double rt = matching_component.min_rt; while (Math.Round(rt, 2) <= Math.Round(matching_component.max_rt, 2)) { int scanNumber = myMsDataFile.GetClosestOneBasedSpectrumNumber(rt); scanNumbers.Add(scanNumber); rt = myMsDataFile.GetOneBasedScan(scanNumber + 1).RetentionTime; } proteinCharge = matching_component.charge_states.OrderByDescending(c => c.intensity).First().charge_count; if (matching_component.charge_states.Count == 1) { proteinCharge = identification.charge; } } var formula = identification.GetChemicalFormula(); if (formula == null) { continue; } // Calculate isotopic distribution of the full peptide var dist = IsotopicDistribution.GetDistribution(formula, 0.1, 0.001); double[] masses = dist.Masses.ToArray(); double[] intensities = dist.Intensities.ToArray(); Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x))); List <int> scansAdded = new List <int>(); foreach (int scanNumber in scanNumbers) { res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, -1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification)); res.Ms1List.AddRange(SearchMS1Spectra(masses, intensities, scanNumber, 1, scansAdded, peaksAddedFromMS1HashSet, proteinCharge, identification)); } } return(res); }