public void SetIdsBasedOnMassAndGlycanType(double ppm_tolerance, bool look_in_clustered) { int best_index = -1; double min_ppm = ppm_tolerance ; double min_hcd = 1; int min_hcd_index = -1; double max_cid = 0; int max_cid_index = -1; GlycoFragworkDLL.Utils.Utilities _utils = new GlycoFragworkDLL.Utils.Utilities(); FragEvents e = new FragEvents() ; if (look_in_clustered) { for (int i = 0; i < _ClusterRepFragEvents.Count; i++) { if (_ClusterRepFragEvents[i].HCDScore < min_hcd) { min_hcd = _ClusterRepFragEvents[i].HCDScore; min_hcd_index = i; } if (_ClusterRepFragEvents[i].CIDScore > max_cid) { max_cid = _ClusterRepFragEvents[i].CIDScore; max_cid_index = i; } } if (min_hcd < 1) _RepresentativeHCDScore = min_hcd; if (max_cid > 0) _RepresentativeCIDScore = (int) max_cid; _RepresentativeETDScore = 0; _RepresentativeCIDSequencingScore = 0; e = new FragEvents() ; e = _ClusterRepFragEvents[min_hcd_index]; } else { int min_hcd_umc_index = -1; int max_cid_umc_index = -1; for (int i = 0; i < _AssociatedUMCRecords.Count; i++) { for (int j = 0; j < _AssociatedUMCRecords[i]._AssociatedFragEvents.Count; j++) { if (_AssociatedUMCRecords[i]._AssociatedFragEvents[j].HCDScore < min_hcd) { min_hcd = _AssociatedUMCRecords[i]._AssociatedFragEvents[j].HCDScore; min_hcd_index = j; min_hcd_umc_index = i; } if (_AssociatedUMCRecords[i]._AssociatedFragEvents[j].CIDScore > max_cid) { max_cid = _AssociatedUMCRecords[i]._AssociatedFragEvents[j].CIDScore; max_cid_umc_index = i; max_cid_index = j; } } } if (min_hcd < 1) _RepresentativeHCDScore = min_hcd; if (max_cid > 0) _RepresentativeCIDScore = (int)max_cid; _RepresentativeETDScore = 0; _RepresentativeCIDSequencingScore = 0; e = new FragEvents(); e = _AssociatedUMCRecords[min_hcd_umc_index]._AssociatedFragEvents[min_hcd_index]; } bool check_sialylated = false ; if ((look_in_clustered) && (e.GP_Record.Glycan.numNeuAc > 0)) { check_sialylated = true; } else { if ((e.GlycanType == GlypID.enmGlycanType.CS) || e.GlycanType == GlypID.enmGlycanType.HY) check_sialylated = true; } for (int k = 0; k < _CandidateGlycopeptideRecords.Length; k++) { bool is_sialylated = false ; if (_CandidateGlycopeptideRecords[k].Glycan.numNeuAc>0) is_sialylated = true ; if( check_sialylated == is_sialylated) { double ppm_diff = _utils.CalculateDelMassPPM(_CandidateGlycopeptideRecords[k].GP_Mono_Mass, _Mass); if (ppm_diff < min_ppm) { best_index = k; min_ppm = ppm_diff; } } } if (best_index > -1) { _BestMatchProteinName = _CandidateGlycopeptideRecords[best_index].Sequence.proteinName; _BestMatchPeptideSeq = _CandidateGlycopeptideRecords[best_index].Sequence.sequence; _BestMatchNGlycoSite = _CandidateGlycopeptideRecords[best_index].Sequence.nGlycoSite; _BestMatchGlycanComposition = _CandidateGlycopeptideRecords[best_index].Glycan.composition; _BestMatchGlycanMass = _CandidateGlycopeptideRecords[best_index].GlycanMonoMass; _BestMatchPeptideMass = _CandidateGlycopeptideRecords[best_index].SequenceMonoMass; _BestMatchFalseHit = _CandidateGlycopeptideRecords[best_index].IsDecoy; _BestMatchParentScanTime = e.ParentScanTime; _BestMatchParentMz = e.ParentMz; if (e.CIDPeaks.Length > 1) { _RepresentativeCIDPeaks = e.CIDPeaks; } if (e.HCDPeaks.Length > 1) { _RepresentativeHCDPeaks = e.HCDPeaks; } } }
public FragEvents(FragEvents e) { _id = e._id; _TransformResult = e._TransformResult; _parent_mz = e._parent_mz; _parent_scan = e._parent_scan; _parent_scan_time = e._parent_scan_time; _gp_record = e._gp_record; _glycanType = e._glycanType; _cid_scan = e._cid_scan; _cid_intensity_values = e._cid_intensity_values; _cid_mz_values = e._cid_mz_values; _CIDPeaks = e._CIDPeaks; _cidProfileType = e._cidProfileType; _cidScore = e._cidScore; // _cid_parent_mz = e._cid_parent_mz; _hcd_scan = e._hcd_scan; _hcd_intensity_values = e._hcd_intensity_values; _hcd_mz_values = e._hcd_mz_values; _HCDPeaks = e._HCDPeaks; _hcdProfileType = e._hcdProfileType; _hcdScore = e._hcdScore; // _hcd_parent_mz = e._hcd_parent_mz; _etd_scan = e._etd_scan; _etd_intensity_values = e._etd_intensity_values; _etd_mz_values = e._etd_mz_values; _ETDPeaks = e._ETDPeaks; _etdProfileType = e._etdProfileType; _etdScore = e._etdScore; // _etd_parent_mz = e._etd_parent_mz; _fdr = e._fdr; _cid_sequencing_score = e._cid_sequencing_score; _FalseHit = e._FalseHit; }
/* public void CalculateCIDRepresentativeFragmentationSpectra(ref Classes.UMCRecord _u, ref Classes.Params _params) { Classes.UMCRecord _tempU = new UMCRecord(); int spectra_num = 0; string cum_spectra_names = null; List<string> cluster_names = new List<string>(); List<int> orphan_ids = new List<int>(); for (int i = 0; i < _u._AssociatedFragEvents.Count; i++) { if (_u._AssociatedFragEvents[i].CIDPeaks[0] != null) { string spectra_name = "Spectra_" + _u.DatasetID + "_" + spectra_num + "_" + _u._AssociatedFragEvents[i].CIDScan; GlypID.Peaks.clsPeak[] thisCIDPeaks = new GlypID.Peaks.clsPeak[_u._AssociatedFragEvents[i].CIDPeaks.Length]; Array.Copy(_u._AssociatedFragEvents[i].CIDPeaks, thisCIDPeaks, _u._AssociatedFragEvents[i].CIDPeaks.Length); _CIDSpectralUtilities.AddPeaksToList(ref thisCIDPeaks, spectra_name); spectra_num++; if (cum_spectra_names != null) cum_spectra_names = cum_spectra_names + "-" + spectra_name; else cum_spectra_names = spectra_name; } else { // TO check // This happens which means there was no fragmentation event then (or) HCD score was bad if (_u._AssociatedFragEvents[i].HCDScore < 1) orphan_ids.Add(_u.DatasetID); } } }*/ public void AssignFDR(ref Classes.MapRecord _glycoMap, ref Classes.Params _params) { bool use_combined_score = false; // To do change this based on _params Classes.FragEvents e = new FragEvents(); for (int i = 0 ; i < _glycoMap._AllMLNRecords.Count ; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; for (int j = 0; j < m._ClusterRepFragEvents.Count; j++) { if (m._ClusterRepFragEvents[j].ETDScore > 0) { if (m._ClusterRepFragEvents[j].GP_Record.IsDecoy) _glycoMap._AllFalseHitsFDRScore.Add(m._ClusterRepFragEvents[j].ETDScore); else _glycoMap._AllTrueHitsFDRScore.Add(m._ClusterRepFragEvents[j].ETDScore); } } } // Assign ETD score based on ETD type for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; for (int j = 0; j < m._ClusterRepFragEvents.Count; j++) { e = new FragEvents(); e = m._ClusterRepFragEvents[j]; if (e.ETDScore > 0) { int num_false_hits = 0; int num_true_hits = 0; foreach (double score in _glycoMap._AllFalseHitsFDRScore) { if (score > e.ETDScore) num_false_hits++; } foreach (double score in _glycoMap._AllTrueHitsFDRScore) { if (score > e.ETDScore) num_true_hits++; } if (num_true_hits == 0) num_true_hits = 1; // to avoid divide by 0 for the highest. e.FDR = (float)num_false_hits / num_true_hits; } } } /* int num_records = m._AssociatedUMCRecords.Count; for (int j = 0; j < num_records; j++) { for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++) { e = new FragEvents(); e = m._AssociatedUMCRecords[j]._AssociatedFragEvents[k]; if (e.ETDScore > 0) { int num_false_hits = 0; int num_true_hits = 0; foreach (double score in _glycoMap._AllFalseHitsFDRScore) { if (score > e.ETDScore) num_false_hits++; } foreach (double score in _glycoMap._AllTrueHitsFDRScore) { if (score > e.ETDScore) num_true_hits++; } e.FDR = (float)num_false_hits / num_true_hits; } } }*/ }
/// <summary> /// Search all glycoeptide records for a particular mass /// </summary> /// <param name="mass">Mass to be searched for</param> /// <param name="matched_gps">List of GPs that matched</param> /// <param name="check_only_sialylated">Simple filtering rule</param> /// <returns>TRUE/FALSE found at least one match</returns> /* deperecated nov 2012 public bool SearchGlycopeptides(double mass, ref List <GlycopeptideRecord> matched_gps, bool check_only_sialylated) { bool found_match = false; matched_gps = new List<GlycopeptideRecord>(); double min_ppm = _PPM_Diff; foreach (GlycopeptideRecord g in _glycopeptides) { bool proceed = false; if (check_only_sialylated) { string comp = g.Glycan.composition; if (g.Glycan.GlycanCompositionHasNeuAC(comp)) proceed = true; } else proceed = true; if (proceed) { if (_utils.CalculateDelMassPPM(g.GP_Mass, mass) < min_ppm) { found_match = true; matched_gps.Add(g); } } if (g.GP_Mass > mass + 0.5) break; } return found_match; }*/ /// <summary> /// Function to Load all fragmentation spectra for glycopeptide ions into the map. /// </summary> /// <param name="_glycoMap"> The Map where stuff gets loaded into</param> /// <param name="filterGlycoPeps">Select only glycopeptides, usually set to true</param> /// <param name="glycanListFile">The glycan file</param> /// <param name="fastaFile">The FASTA file</param> /// <param name="gpFile">A file for glycopeptides</param> public void LoadFragmentationSpectraIntoMap(ref Classes.MapRecord _glycoMap, ref Classes.Params _parameters, string glycanListFile, string fastaFile, string gpFile ) { List<string> DatasetNames = _glycoMap._AssociatedDatasetNames; List <bool> isCID = _glycoMap._IsCID; List <bool> isHCD = _glycoMap._IsHCD; List <bool> isETD = _glycoMap._IsETD; bool filter_glycopeptides = _parameters.ProcessOnlyNGlycopeptides; double max_coverage = _parameters.MaxUMCCoverage; GlypID.Sequence.clsSequence[] sequences = new GlypID.Sequence.clsSequence[1]; GlypID.Glycan.clsGlycan[] glycans = new GlypID.Glycan.clsGlycan[1]; GlypID.Glycopeptide.clsGlycopeptide GlycoPeptide = new GlypID.Glycopeptide.clsGlycopeptide(); Classes.MapRecord _tMap = new MapRecord(); // -- Get Glycopeptide List --// if (_parameters.ProcessOnlyNGlycopeptides) { if (!_parameters.UseGlycoPeptideFile) { Console.WriteLine("Reading Glycan List"); try { GlycoPeptide.LoadGlycansFromList(glycanListFile, ref glycans, _parameters.UseDecoyGlycan); } catch (Exception e) { System.Console.WriteLine(e.Message); return; } Console.WriteLine("Reading Fasta File"); GlycoPeptide.LoadNGlycopeptidesFromFasta(fastaFile, ref sequences, _parameters.UseDecoyPeptide); Console.WriteLine("Setting Glycopeptides"); SetGlycoPeptides(ref sequences, ref glycans); //slightly inflated*/ if (_parameters.CreateGlycoPeptideFile) { int last_pos2 = fastaFile.LastIndexOf("."); string outputgpfile = fastaFile.Substring(0, last_pos2) + "_GPInfo_v2.csv"; Console.WriteLine("Writing to GP File"); WriteOutGlycopeptidesToFile(outputgpfile); } Console.WriteLine("Creating Hash"); CreateHashTableGlycopeptides(_glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, true); } else { Console.WriteLine("Reading in from GP file"); LoadGlycopeptidesFromFile(gpFile , _glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, true ); Console.WriteLine("Creating Hash") ; //CreateHashTableGlycopeptides(_glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, false); } } Console.WriteLine("Loading Frag events and scoring"); _glycopeptides.Clear(); _glycoMap._AllMLNRecords.ForEach(delegate(MultiAlignRecord m) { bool process_mrecord = false; Console.WriteLine("UMC = " + m.ID) ; // Only load in thosse ions that match the glycopeptides list if (filter_glycopeptides) { List<GlycopeptideRecord> candidate_gps = new List<GlycopeptideRecord>(); process_mrecord = SearchGlycoPeptidesDictionary(m.Mass, ref candidate_gps, false); if (process_mrecord) { /*m._CandidatePeptideSeq = new string[candidate_gps.Count]; m._CandidateProteinName = new string[candidate_gps.Count]; m._CandidateGlycanComposition = new string[candidate_gps.Count]; m._CandidateGlycanMass = new double[candidate_gps.Count]; m._CandidatePeptideMass = new double[candidate_gps.Count]; m._CandidateNGlycoSite = new string[candidate_gps.Count]; for (int i = 0; i < candidate_gps.Count; i++) { m._CandidateGlycanComposition[i] = candidate_gps[i].Glycan.composition; m._CandidatePeptideSeq[i] = candidate_gps[i].Sequence.sequence; m._CandidateProteinName[i] = candidate_gps[i].Sequence.proteinName; m._CandidateGlycanMass[i] = candidate_gps[i].GlycanMonoMass; m._CandidatePeptideMass[i] = candidate_gps[i].SequenceMonoMass; ; m._CandidateNGlycoSite[i] = candidate_gps[i].Sequence.nGlycoSite; }*/ m._CandidateGlycopeptideRecords = new GlycopeptideRecord[candidate_gps.Count]; for (int i = 0; i < candidate_gps.Count; i++) { m._CandidateGlycopeptideRecords[i] = candidate_gps[i]; } } } else { process_mrecord = true; } if (process_mrecord) { bool store_record = false ; // This determins if the record needs to be stored for (int i = 0; i < m._AssociatedUMCRecords.Count; i++) { // -- Start processing each umc -- // UMCRecord u = m._AssociatedUMCRecords[i]; if (u.ScanRep == 0) continue; int datasetid = u.DatasetID; // _RawData = new GlypID.Readers.clsRawData(DatasetNames[datasetid], GlypID.Readers.FileType.FINNIGAN); double scanrange = Convert.ToDouble(u.ScanEnd - u.ScanStart); int numscans = _RawData.GetNumScans(); double coverage = scanrange / numscans; if (coverage >= max_coverage) { //_RawData. continue; } // umc has passed filtering conditions so start processing each scan in umc double min_ppm = 50; // Process each MSn scan from start to stop of UMC List<double> parents_observed = new List<double>(); List<FragEvents>allobserved_frag_events = new List<FragEvents>() ; Classes.FragEvents e = new FragEvents(); double most_recent_precursor = 0; int frag_id = 0; for (int scan = u.ScanStart ; scan <= u.ScanEnd ; scan++) { if (_RawData.IsMSScan(scan)) continue; // Start processing MSn scan bool process_scan = false; bool record_scan = false ; // get parent double parent_mz = _RawData.GetParentMz(scan); short header_cs = (short) _RawData.GetParentChargeFromHeader(scan); if (header_cs > 0) { double mass = _utils.CalculateMass(parent_mz, header_cs); if (_utils.CalculateDelMassDa(mass, m.Mass) < _DA_Diff) process_scan = true; } else { // try default charge states double mass1 = _utils.CalculateMass(parent_mz, 2); if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff) process_scan = true; double mass2 = _utils.CalculateMass(parent_mz, 3); if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff) process_scan = true; double mass3 = _utils.CalculateMass(parent_mz, 4); if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff) process_scan = true; double mass4 = _utils.CalculateMass(parent_mz, 5); if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff) process_scan = true; double mass5 = _utils.CalculateMass(parent_mz, 6); if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff) process_scan = true; double mass6 = _utils.CalculateMass(parent_mz, 7); if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff) process_scan = true; } if (!process_scan) continue; // Scan has passed ALL filters float[] msms_mzs = new float[1]; float[] msms_intensities = new float[1]; _RawData.GetRawData(scan, ref msms_mzs, ref msms_intensities); if (_RawData.IsProfileScan(scan)) _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.PROFILE; else _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED; if (most_recent_precursor != parent_mz) { // Only deisotope if parent has not been observed // Store the previous Fragmentation event if (e.HCDScore < 1) { /*FragEvents tEvent = new FragEvents(); tEvent = e; m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tEvent);*/ allobserved_frag_events.Add(e); } // Clear up everything /* e.ClearTransformRecord(); e.ClearHCD(); e.ClearETD(); e.ClearCID(); e.ClearGPInfo();*/ e = new FragEvents(); e.ID = frag_id; frag_id++; // Start over parents_observed.Add(parent_mz); most_recent_precursor = parent_mz; float[] parent_mzs = new float[1]; float[] parent_intensities = new float[1]; Reset(); // ** ---- deisotope the precursor -- ** // int parent_scan = _RawData.GetParentScan(scan); double parent_scan_time = _RawData.GetScanTime(parent_scan); //TODO _RawData.GetRawData(parent_scan, ref parent_mzs, ref parent_intensities); // Do peak finding if (_RawData.IsProfileScan(parent_scan)) _ParentPeakProcessor.ProfileType = GlypID.enmProfileType.PROFILE; else _ParentPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED; double thresh = GlypID.Utils.GetAverage(ref parent_intensities, float.MaxValue); double background_intensity = GlypID.Utils.GetAverage(ref parent_intensities, (float)(5 * thresh)); _ParentPeakProcessor.SetPeakIntensityThreshold(background_intensity * _ParentPeakProcessorParams.PeakBackgroundRatio); _ParentPeakProcessor.DiscoverPeaks(ref parent_mzs, ref parent_intensities, ref _ParentPeaks, Convert.ToSingle(_TransformParameters.MinMZ), Convert.ToSingle(_TransformParameters.MaxMZ), true); // Pick out precursor and transform double pep_intensity; if (_TransformParameters.UseAbsolutePeptideIntensity) pep_intensity = _TransformParameters.AbsolutePeptideIntensity; else pep_intensity = background_intensity * _TransformParameters.PeptideMinBackgroundRatio; bool found = false; if (_RawData.IsFTScan(parent_scan)) { found = _HornTransform.FindPrecursorTransform(Convert.ToSingle(background_intensity), Convert.ToSingle(pep_intensity), ref parent_mzs, ref parent_intensities, ref _ParentPeaks, Convert.ToSingle(parent_mz), ref _ParentTransformResults); } if (!found) { // Low resolution data or bad high res spectra if (header_cs > 0) { double mono_mz = _RawData.GetParentMonoMzFromHeader(scan); if (mono_mz == 0) mono_mz = parent_mz; /* GlypID.Peaks.clsPeak monoPeak = new GlypID.Peaks.clsPeak(); _ParentPeakProcessor.GetClosestPeakMz(monoPeak, Convert.ToSingle(mono_mz));*/ short[] charges = new short[1]; charges[0] = header_cs; _HornTransform.AllocateValuesToTransform(Convert.ToSingle(mono_mz), 0, ref charges, ref _ParentTransformResults); found = true; record_scan = true; e.TransformResult = _ParentTransformResults[0]; e.ParentMz = parent_mz; e.ParentScan = parent_scan; e.ParentScanTime = parent_scan_time; } /*else Removind this for now { // instrument has no charge just store 2 and 3. short[] charges = new short[2]; charges[0] = 2; charges[1] = 3; _HornTransform.AllocateValuesToTransform(Convert.ToSingle(parent_mz), 0, ref charges, ref _ParentTransformResults); }*/ } else { double ppm = _utils.CalculateDelMassPPM(m.Mass, _ParentTransformResults[0].mdbl_mono_mw); if ((ppm <= min_ppm) || (Math.Abs(ppm - min_ppm) < 1E-05)) { record_scan = true; e.TransformResult = _ParentTransformResults[0]; e.ParentMz = parent_mz; e.ParentScan = parent_scan; e.ParentScanTime = parent_scan_time; } } } else { if (e.TransformResult.mdbl_mono_mw > 0) { // Has been deisotoped successfulle record_scan = true ; } } // -------- Process scan according to type -------- // if (isCID[datasetid] && _RawData.IsCIDScan(scan) && record_scan) { // CID scan _MSMSPeakProcessor.SetPeakIntensityThreshold(0); // Since yin takes top 20 on his own. _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks, Convert.ToSingle(_TransformParameters.MinMZ), Convert.ToSingle(_TransformParameters.MaxMZ), false); if (_MSMSPeaks.Length > 0) { //Scoring GlypID.CIDScoring.clsCIDScoringScanResults[] tCIDScoreResults = new GlypID.CIDScoring.clsCIDScoringScanResults[1]; bool found_score = _CIDScoring.ScoreCIDSpectra(ref _MSMSPeaks, ref msms_mzs, ref msms_intensities, ref _ParentTransformResults, ref tCIDScoreResults); if (found_score & tCIDScoreResults[0].mdbl_cid_score > e.CIDScore) { e.CIDScan = scan; e.CIDMzs = msms_mzs; e.CIDIntensities = msms_intensities; e.CIDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length]; e.CIDProfileType = _MSMSPeakProcessor.ProfileType; e.CIDScore = tCIDScoreResults[0].mdbl_cid_score; Array.Copy(_MSMSPeaks, e.CIDPeaks, _MSMSPeaks.Length); } } } if (isHCD[datasetid] && _RawData.IsHCDScan(scan) && record_scan) { // HCD Scan double hcd_background_intensity = GlypID.Utils.GetAverage(ref msms_intensities, ref msms_mzs, Convert.ToSingle(_ScoringParameters.MinHCDMz), Convert.ToSingle(_ScoringParameters.MaxHCDMz)); _MSMSPeakProcessor.SetPeakIntensityThreshold(hcd_background_intensity); _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks, Convert.ToSingle(_ScoringParameters.MinHCDMz), Convert.ToSingle(_ScoringParameters.MaxHCDMz), false); if (_MSMSPeaks.Length > 0) { // Score GlypID.HCDScoring.clsHCDScoringScanResults[] tHCDScoreResults = new GlypID.HCDScoring.clsHCDScoringScanResults[1]; double score = _HCDScoring.ScoreHCDSpectra(ref _MSMSPeaks, ref msms_mzs, ref msms_intensities, ref _ParentTransformResults, ref tHCDScoreResults); if (score < e.HCDScore) // This makes sure that within frag events of the same parent, the lowest one is chosen { // Store store_record = true; e.HCDScan = scan; e.HCDMzs = msms_mzs; e.HCDIntensities = msms_intensities; e.HCDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length]; /*m._AssociatedUMCRecords[i].HCDScanRep = scan; m._AssociatedUMCRecords[i].HCDScanRepET = _RawData.GetScanTime(scan);*/ e.HCDProfileType = _MSMSPeakProcessor.ProfileType; Array.Copy(_MSMSPeaks, e.HCDPeaks, _MSMSPeaks.Length); e.HCDScore = tHCDScoreResults[0].mdbl_hcd_score; e.GlycanType = (GlypID.enmGlycanType) tHCDScoreResults[0].menm_glycan_type ; } } } if (isETD[datasetid] && _RawData.IsETDScan(scan) && record_scan) { // ETD Scan _MSMSPeakProcessor.SetPeakIntensityThreshold(0); _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED; _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks, 0, Convert.ToSingle(parent_mz), false); if ((_MSMSPeaks.Length > 1) && (e.HCDScore <1)) { // Score int best_scoring_index = -1; double max_score = 0; for (int k = 0; k < m._CandidateGlycopeptideRecords.Length; k++) { // Confirm glycan type matches with HCD prediction bool process_result = false; if ((e.GlycanType == GlypID.enmGlycanType.CS) || e.GlycanType == GlypID.enmGlycanType.HY) { if (m._CandidateGlycopeptideRecords[k].Glycan.numNeuAc >0) process_result = true; else process_result = false; } else process_result = true; // Stupidly planned type conversion ; Just doing the bare minimum GlypID.ETDScoring.clsETDScoringScanResults thisResult = new GlypID.ETDScoring.clsETDScoringScanResults(); thisResult.mdbl_parent_mz = e.ParentMz; thisResult.mdbl_mono_mw = e.TransformResult.mdbl_mono_mw; thisResult.mshort_cs = e.TransformResult.mshort_cs; thisResult.mstr_glyco_site = m._CandidateGlycopeptideRecords[k].Sequence.nGlycoSite; thisResult.mstr_nglyco_site = m._CandidateGlycopeptideRecords[k].Sequence.nGlycoSite; thisResult.mstr_pep_seq = m._CandidateGlycopeptideRecords[k].Sequence.sequence; thisResult.mdbl_glycan_mass = m._CandidateGlycopeptideRecords[k].GlycanMonoMass; if (thisResult.mstr_pep_seq != "" && process_result) { double etd_score = _ETDScoring.ScoreETDSpectra(ref _MSMSPeaks, thisResult); if (etd_score >= max_score) { thisResult.mdbl_etd_score = etd_score; best_scoring_index = k; max_score = etd_score; } } } // Store if ((best_scoring_index != -1) && (max_score > e.ETDScore)) { // Store e.ETDScan = scan; e.ETDMzs = msms_mzs; e.ETDIntensities = msms_intensities; e.ETDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length]; Array.Copy(_MSMSPeaks, e.ETDPeaks, _MSMSPeaks.Length); e.ETDScore = max_score; e.GP_Record = m._CandidateGlycopeptideRecords[best_scoring_index]; e.FalseHit = e.GP_Record.IsDecoy; /* if (e.FalseHit) _tMap._AllFalseHitsFDRScore.Add(e.ETDScore); else _tMap._AllTrueHitsFDRScore.Add(e.ETDScore) ; */ // m._AssociatedUMCRecords[i].ETDScanRep = scan; } } } } // Store the most recent fragmentation event if (e.HCDScore < 1) { /*FragEvents tEvent = new FragEvents(e); m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tEvent);*/ allobserved_frag_events.Add(e); } // _RawData.Close(); m._AssociatedUMCRecords[i]._AssociatedFragEvents.Clear(); for (int kk = 0; kk < allobserved_frag_events.Count; kk++) { FragEvents tempE = allobserved_frag_events[kk] ; //as FragEvents; m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tempE); } m._AssociatedUMCRecords[i].SetRepScores(); } if (store_record) _tMap.AddRecord(m); } }); if (_tMap._AllMLNRecords.Count != _glycoMap._AllMLNRecords.Count) { _glycoMap.ClearRecords(); for (int k = 0; k < _tMap._AllMLNRecords.Count; k++) { _glycoMap.AddRecord(_tMap._AllMLNRecords[k]); } } // _glycoMap._AllTrueHitsFDRScore = _tMap._AllTrueHitsFDRScore; //_glycoMap._AllFalseHitsFDRScore = _tMap._AllFalseHitsFDRScore; _tMap.ClearRecords(); }
public void SequenceCIDPeaks(ref Classes.MapRecord _glycoMap, ref Classes.Params _params, string sequencingFolder) { Classes.MapRecord _tempMap = new MapRecord(); _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames; _tempMap._IsCID = _glycoMap._IsCID; _tempMap._IsETD = _glycoMap._IsETD; _tempMap._IsHCD = _glycoMap._IsHCD; Classes.FragEvents e = new FragEvents(); if (sequencingFolder == "") sequencingFolder = @"c:\sequencing\"; for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; int num_records = m._AssociatedUMCRecords.Count; if (m.ID == 2290) { bool test = true; } //for (int j = 0; j < num_records; j++) //{ // for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++) for (int k = 0; k < m._ClusterRepFragEvents.Count; k++) { e = new FragEvents(); e = m._ClusterRepFragEvents[k]; // m._AssociatedUMCRecords[j]._AssociatedFragEvents[k]; if (e.ETDScore > 0) { float PeptideMass = 0; float GlcNAcMass = 0; COL.MassLib.MSScan _msScan = new COL.MassLib.MSScan(e.CIDMzs, e.CIDIntensities, Convert.ToSingle(e.TransformResult.mdbl_mz), Convert.ToSingle(e.TransformResult.mdbl_mono_mw), Convert.ToSingle(e.TransformResult.mdbl_average_mw), Convert.ToInt32(e.TransformResult.mshort_cs)); if (e.TransformResult.mdbl_average_mw > 0) { PeptideMass = Convert.ToSingle(e.GP_Record.SequenceAverageMass); // this means proper deisotoping has occured so use average compostion GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanAVGMass(COL.GlycoLib.Glycan.Type.HexNAc); } else { PeptideMass = Convert.ToSingle(e.GP_Record.SequenceMonoMass); // CS has been assigned, in which case both y1 and precursor should be just mono GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanMass(COL.GlycoLib.Glycan.Type.HexNAc); } short y1cs = e.TransformResult.mshort_cs; y1cs--; while (y1cs > 0) { float y1Mz = ((PeptideMass + GlcNAcMass) + (float)_utils._CC_MASS * y1cs) / y1cs; COL.GlycoSequence.GlycanSequencing _Gs = new COL.GlycoSequence.GlycanSequencing(_msScan, y1Mz, y1cs, e.GP_Record.Glycan.numHex, e.GP_Record.Glycan.numHexNAc, e.GP_Record.Glycan.numDeHex, e.GP_Record.Glycan.numNeuAc, 0, sequencingFolder, true, 0.8f, 60); _Gs.NumbersOfPeaksForSequencing = 140; _Gs.CreatePrecursotMZ = true; _Gs.RewardForCompleteStructure = 3; if (e.TransformResult.mdbl_average_mw > 0) _Gs.UseAVGMass = true; else _Gs.UseAVGMass = false; int structure_count = _Gs.StartSequencing(); if (structure_count > 0) { List<COL.GlycoLib.GlycanStructure> topstructures = _Gs.GetTopRankScoreStructre(1); e.CIDSequencingScore = topstructures[0].Score; e.GP_Record.GlycanSequence = topstructures[0].IUPACString; // Printing out the sequences string opfile = sequencingFolder + m.ID.ToString() + "_" + e.CIDScan.ToString() + ".txt"; Utils.CSVFileHandler cidString = new CSVFileHandler(opfile, CSVFileHandler.WRITE_ONLY); cidString.openFile(); for (int zz = 0; zz < topstructures.Count; zz++) { cidString.writeLine(topstructures[zz].IUPACString); /*COL.GlycoLib.GlycansDrawer _gdraw = new COL.GlycoLib.GlycansDrawer(topstructures[zz].IUPACString, false); System.Drawing.Image img1 = _gdraw.GetImage();*/ } cidString.closeFile(); break; } y1cs--; } } } } }
public void GetRepresentatives(ref Classes.MapRecord _glycoMap, ref Classes.Params _params) { Classes.MapRecord _tempMap = new MapRecord(); _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames; _tempMap._IsCID = _glycoMap._IsCID; _tempMap._IsETD = _glycoMap._IsETD; _tempMap._IsHCD = _glycoMap._IsHCD; _tempMap._AllFalseHitsFDRScore = _glycoMap._AllFalseHitsFDRScore; _tempMap._AllTrueHitsFDRScore = _glycoMap._AllTrueHitsFDRScore; bool use_etd = false ; if (_glycoMap._IsETD.Contains(true)) use_etd = true ; FragEvents tempE = new FragEvents(); for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; int num_clusters = m._ClusterNames.Count; if (m.ID == 2290) { bool test = true; } for (int c = 0; c < num_clusters; c++) { string clustername = m._ClusterNames[c]; string[] spectra = clustername.Split('-'); Dictionary <int, List<int>> umcs_frag_ids = new Dictionary<int, List<int>>() ; List<int>frag_to_look = new List<int> () ; foreach (string s in spectra) { string[] parts = s.Split('_'); int umc_id = Convert.ToInt32(parts[1]); int frag_id = Convert.ToInt32(parts[2]) ; if (umcs_frag_ids.ContainsKey(umc_id)) { umcs_frag_ids[umc_id].Add(frag_id); } else { frag_to_look.Clear() ; frag_to_look.Add(frag_id) ; umcs_frag_ids.Add(umc_id, frag_to_look); } } double max_etd_score = 0; double min_hcd_score = 0 ; FragEvents maxFragEvent = new FragEvents(); for (int j = 0; j < m._AssociatedUMCRecords.Count; j++) { if (umcs_frag_ids.ContainsKey(m._AssociatedUMCRecords[j].DatasetID)) { tempE = new FragEvents(); if (use_etd) { tempE = m._AssociatedUMCRecords[j].FragEventWithHighestETDScore(true, umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]); if (tempE.ETDScore > max_etd_score) { maxFragEvent = tempE; } } else { if (!_params.ProcessOnlyNGlycopeptides) maxFragEvent = m._AssociatedUMCRecords[j].FragEventWithLowestHCDScore(umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]); } } } if ((maxFragEvent.ETDScore > 0) || (!_params.ProcessOnlyNGlycopeptides)) m._ClusterRepFragEvents.Add(maxFragEvent); /*else { FragEvents minHCDFragEvent = new FragEvents(); double min_hcd_score = 1; for (int j= 0 ; j < m._AssociatedUMCRecords.Count ; j++) { if (umcs_frag_ids.ContainsKey(m._AssociatedUMCRecords[j].DatasetID)) { tempE = new FragEvents(); tempE = m._AssociatedUMCRecords[j].FragEventWithLowestHCDScore(umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]); if (tempE.HCDScore < min_hcd_score) minHCDFragEvent = tempE; } } m._ClusterRepFragEvents.Add(minHCDFragEvent); } */ } } }
/// <summary> /// Function to cluster CID spectra in each record. /// </summary> /// <param name="_glycoMap"></param> /// <param name="_params"></param> public void ClusterRecordsOnCID(ref Classes.MapRecord _glycoMap, ref Classes.Params _params) { Classes.MapRecord _tempMap = new MapRecord(); _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames; _tempMap._IsCID = _glycoMap._IsCID; _tempMap._IsETD = _glycoMap._IsETD; _tempMap._IsHCD = _glycoMap._IsHCD; int Id = 0; FragEvents tempE = new FragEvents(); for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++) { MultiAlignRecord m = new MultiAlignRecord(); m = _glycoMap._AllMLNRecords[i]; int num_records = m._AssociatedUMCRecords.Count; short mincs = m.MinChargeStateObserved(); short maxcs = m.MaxChargeStateObserved(); for (short thiscs = mincs; thiscs <= maxcs; thiscs++) { int spectra_num = 0; string cum_spectra_names = null; List<string> cluster_names = new List<string>(); List<int> orphan_ids = new List<int>(); for (int j = 0; j < num_records; j++) { if (m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count == 0) { // Not fragmented in this UMC but keep track of this but // // This happens which means there was no fragmentation event then (or) HCD score was bad if (m._AssociatedUMCRecords[j].Abundance > 0) orphan_ids.Add(m._AssociatedUMCRecords[j].DatasetID); } for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++) { if ((m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks[0] != null) && (m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].TransformResult.mshort_cs == thiscs)) { string spectra_name = "Spectra_" + m._AssociatedUMCRecords[j].DatasetID + "_" + m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].ID + "_" + spectra_num + "_" + m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDScan; GlypID.Peaks.clsPeak[] thisCIDPeaks = new GlypID.Peaks.clsPeak[m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks.Length]; Array.Copy(m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks, thisCIDPeaks, m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks.Length); _CIDSpectralUtilities.AddPeaksToList(ref thisCIDPeaks, spectra_name); spectra_num++; if (cum_spectra_names != null) cum_spectra_names = cum_spectra_names + "-" + spectra_name; else cum_spectra_names = spectra_name; } } } int num_clusters = _CIDSpectralUtilities.ClusterSpectraInList(); _CIDSpectralUtilities.GetClusterNames(ref cluster_names); if (num_clusters > 1) { // Indicates glycoforms bool debug = true; debug = true; } for (int c = 0; c < num_clusters; c++) { m._ClusterNames.Add(cluster_names[c]); } _CIDSpectralUtilities.Clear(); } _tempMap.AddRecord(m); } // Restore them. _glycoMap.ClearRecords(); for (int i = 0; i < _tempMap._AllMLNRecords.Count; i++) { _glycoMap.AddRecord(_tempMap._AllMLNRecords[i]); } /*else { /* MultiAlignRecord _tempM = new MultiAlignRecord(m); _tempM.ID = Id; _tempMap.AddRecord(_tempM); Id++; */ /* if (cum_spectra_names != null) { // Choose the spectra with the greatest SNR num_clusters = 1; cluster_names.Add(cum_spectra_names); _CIDSpectralUtilities.AssignClusters(ref cluster_names); } }*/ /* for (int k = 0; k < num_clusters; k++) { // ----- Get a representative spectrum index for each cluster ---// MultiAlignRecord _tempM = new MultiAlignRecord(m); _tempM.ID = Id; _tempM._AssociatedUMCRecords.Clear(); GlypID.Peaks.clsPeak[] repCIDPeaks = new GlypID.Peaks.clsPeak[0]; int repOrigIndex = _CIDSpectralUtilities.GetRepresentativePeaksFromCluster(k, ref repCIDPeaks, _params.ScoringParams.MinCIDMz, _params.ScoringParams.MaxCIDMz, true); if (repCIDPeaks.Length > 1) { _tempM._RepresentativeCIDPeaks = repCIDPeaks; _tempM._RepresentativeDatasetID_CID = repOrigIndex; } // Attach UMCs corresponding to that cluster. List<int> allOrigIDs = new List<int>(); _CIDSpectralUtilities.GetOriginalIDFromCluster(k, ref allOrigIDs); for (int j = 0; j < num_records; j++) { int id = m._AssociatedUMCRecords[j].DatasetID; if (allOrigIDs.Exists(element => element == id)) { UMCRecord _tempUMC = new UMCRecord(); _tempUMC = m._AssociatedUMCRecords[j]; _tempM._AssociatedUMCRecords.Add(_tempUMC); } else if (orphan_ids.Exists(element => element == id)) // This takes care of non fragmentation but still has stuff present { UMCRecord _tempUMC = new UMCRecord(); _tempUMC = m._AssociatedUMCRecords[j]; _tempM._AssociatedUMCRecords.Add(_tempUMC); } } _tempMap.AddRecord(_tempM); Id++; }*/ }
public FragEvents FragEventWithLowestHCDScore( List<int> IdsToLook) { FragEvents t = new FragEvents(); double min_hcd_score = 1; double prev_cid_score = 0 ; int min_hcd_score_index = -1; for (int i = 0; i < _AssociatedFragEvents.Count; i++) { if ((_AssociatedFragEvents[i].HCDScore <= min_hcd_score) && (IdsToLook.Contains(_AssociatedFragEvents[i].ID))) { if (_AssociatedFragEvents[i].HCDScore < min_hcd_score) { min_hcd_score_index = i; min_hcd_score = _AssociatedFragEvents[i].HCDScore; prev_cid_score = _AssociatedFragEvents[i].CIDScore; } else { if (_AssociatedFragEvents[i].CIDScore > prev_cid_score) { min_hcd_score_index = i; min_hcd_score = _AssociatedFragEvents[i].HCDScore; prev_cid_score = _AssociatedFragEvents[i].CIDScore; } } } } if (min_hcd_score_index > -1) t = _AssociatedFragEvents[min_hcd_score_index]; else { bool debug ; debug = true; } return t; }
public FragEvents FragEventWithHighestETDScore(bool use_associated) { FragEvents t = new FragEvents(); double max_etd_score = 0; int max_etd_score_index = -1; if (use_associated) { for (int i = 0; i < _AssociatedFragEvents.Count; i++) { if (_AssociatedFragEvents[i].ETDScore > max_etd_score) { max_etd_score_index = i; max_etd_score = _AssociatedFragEvents[i].ETDScore; } } } else { for (int i = 0; i < _ClusteredFragEvents.Count; i++) { if (_ClusteredFragEvents[i].ETDScore > max_etd_score) { max_etd_score_index = i; max_etd_score = _ClusteredFragEvents[i].ETDScore; } } } if (max_etd_score > 0) t = _AssociatedFragEvents[max_etd_score_index]; return t; }