Example #1
0
        public MapParserOld()
        {
            _GlycoMap = new GlycoFragworkDLL.Classes.MapRecord();
            _utils = new GlycoFragworkDLL.Utils.Utilities();
            _ScoringParameters = new GlypID.Scoring.clsScoringParameters();
            _TransformParameters = new GlypID.HornTransform.clsHornTransformParameters();

            _binSize = 0.05;
            _PPMDiff = 20;
        }
Example #2
0
 public MapParserOld(MapRecord mrecord, GlypID.Scoring.clsScoringParameters score_params, GlypID.HornTransform.clsHornTransformParameters transform_params)
 {
     _GlycoMap = new MapRecord();
     _GlycoMap = mrecord;
 }
Example #3
0
        public void LoadMap(ref Classes.MapRecord _glycoMap, string MLNMapFilename, List<string> DatasetNames,
            List<bool> isCID, List <bool> isHCD, List<bool> isETD, List <string> dataType)
        {
            _glycoMap = new GlycoFragworkDLL.Classes.MapRecord();

            int _numdatasets = DatasetNames.Count;

            // read in MLN file
            //_glycoMap.ReadInMLNFile(MLNMapFilename, _numdatasets);
            _glycoMap.ReanInMLNV2File(MLNMapFilename, _numdatasets);

            // Assign datasetnames and dataset maps
            _glycoMap._AssociatedDatasetNames = DatasetNames ;
            _glycoMap._IsCID = isCID;
            _glycoMap._IsETD = isETD ;
            _glycoMap._IsHCD = isHCD;
            _glycoMap._AssociatedDatasetTypes = dataType;
        }
Example #4
0
        /// <summary>
        /// Runs through all mrecords in the map and searches for putative glycopeptide match based on mass
        /// </summary>
        /// <param name="_glycoMap"></param>
        /// <param name="fastaFile"></param>
        /// <param name="glycanListFile"></param>
        public void SearchMapForNGlycopeptides(ref MapRecord _glycoMap, string fastaFile, string glycanListFile)
        {
            GlypID.Sequence.clsSequence[] sequences = new GlypID.Sequence.clsSequence[1];
            GlypID.Glycan.clsGlycan[] glycans = new GlypID.Glycan.clsGlycan[1];
            GlypID.Glycopeptide.clsGlycopeptide GlycoPeptide = new GlypID.Glycopeptide.clsGlycopeptide();

            GlycoPeptide.LoadGlycansFromList(glycanListFile, ref glycans);
            GlycoPeptide.LoadNGlycopeptidesFromFasta(fastaFile, ref sequences);

            _glycoMap._AllMLNRecords.ForEach(delegate(MultiAlignRecord m)
              {
              GlypID.ETDScoring.clsETDScoringScanResults[] etdScoringResults = new GlypID.ETDScoring.clsETDScoringScanResults[1];
              etdScoringResults[0] = new GlypID.ETDScoring.clsETDScoringScanResults();
              etdScoringResults[0].mdbl_mono_mw = m.Mass;
              GlypID.enmGlycanType type = GlypID.enmGlycanType.NA;
              GlycoPeptide.SearchForGlycopeptides(ref etdScoringResults, ref glycans, ref sequences, type, false) ;
              m._PeptideSeq = new string[etdScoringResults.Length];
              m._ProteinName = new string[etdScoringResults.Length];
              m._GlycanComposition = new string[etdScoringResults.Length];
              for (int i = 0 ; i < etdScoringResults.Length ; i++)
              {
                  m._GlycanComposition[i] = etdScoringResults[i].mstr_glycan_composition;
                  m._PeptideSeq[i] = etdScoringResults[i].mstr_pep_seq;
                  m._ProteinName[i] = etdScoringResults[i].mstr_pro_seq_name;
              }
              });
        }
Example #5
0
        /// <summary>
        /// Search all glycoeptide records for a particular mass
        /// </summary>
        /// <param name="mass">Mass to be searched for</param>
        /// <param name="matched_gps">List of GPs that matched</param>
        /// <param name="check_only_sialylated">Simple filtering rule</param>
        /// <returns>TRUE/FALSE found at least one match</returns>
        /* deperecated nov 2012 public bool SearchGlycopeptides(double mass, ref  List <GlycopeptideRecord> matched_gps, bool check_only_sialylated)
        {
            bool found_match = false;
            matched_gps = new List<GlycopeptideRecord>();
            double min_ppm = _PPM_Diff;
            foreach (GlycopeptideRecord g in _glycopeptides)
            {
                bool proceed = false;
                if (check_only_sialylated)
                {
                    string comp = g.Glycan.composition;
                    if (g.Glycan.GlycanCompositionHasNeuAC(comp))
                        proceed = true;
                }
                else
                    proceed = true;

                if (proceed)
                {
                    if (_utils.CalculateDelMassPPM(g.GP_Mass, mass) < min_ppm)
                    {
                        found_match = true;
                        matched_gps.Add(g);
                    }
                }
                if (g.GP_Mass > mass + 0.5)
                    break;

            }

            return found_match;
        }*/
        /// <summary>
        ///  Function to Load all fragmentation spectra for glycopeptide ions into the map.
        /// </summary>
        /// <param name="_glycoMap"> The Map where stuff gets loaded into</param>
        /// <param name="filterGlycoPeps">Select only glycopeptides, usually set to true</param>
        /// <param name="glycanListFile">The glycan file</param>
        /// <param name="fastaFile">The FASTA file</param>
        /// <param name="gpFile">A file for glycopeptides</param>
        public void LoadFragmentationSpectraIntoMap(ref Classes.MapRecord _glycoMap, ref Classes.Params _parameters, string glycanListFile, string fastaFile, string gpFile )
        {
            List<string> DatasetNames = _glycoMap._AssociatedDatasetNames;
            List <bool> isCID = _glycoMap._IsCID;
            List <bool> isHCD = _glycoMap._IsHCD;
            List <bool> isETD = _glycoMap._IsETD;

            bool filter_glycopeptides = _parameters.ProcessOnlyNGlycopeptides;
            double max_coverage = _parameters.MaxUMCCoverage;

            GlypID.Sequence.clsSequence[] sequences = new GlypID.Sequence.clsSequence[1];
            GlypID.Glycan.clsGlycan[] glycans = new GlypID.Glycan.clsGlycan[1];
            GlypID.Glycopeptide.clsGlycopeptide GlycoPeptide = new GlypID.Glycopeptide.clsGlycopeptide();
            Classes.MapRecord _tMap = new MapRecord();

            // -- Get Glycopeptide List --//
            if (_parameters.ProcessOnlyNGlycopeptides)
            {
                if (!_parameters.UseGlycoPeptideFile)
                {
                    Console.WriteLine("Reading Glycan List");
                    try
                    {
                        GlycoPeptide.LoadGlycansFromList(glycanListFile, ref glycans, _parameters.UseDecoyGlycan);
                    }
                    catch (Exception e)
                    {
                        System.Console.WriteLine(e.Message);
                        return;
                    }
                    Console.WriteLine("Reading Fasta File");
                    GlycoPeptide.LoadNGlycopeptidesFromFasta(fastaFile, ref sequences, _parameters.UseDecoyPeptide);
                    Console.WriteLine("Setting Glycopeptides");
                    SetGlycoPeptides(ref sequences, ref glycans);  //slightly inflated*/

                    if (_parameters.CreateGlycoPeptideFile)
                    {
                        int last_pos2 = fastaFile.LastIndexOf(".");
                        string outputgpfile = fastaFile.Substring(0, last_pos2) + "_GPInfo_v2.csv";
                        Console.WriteLine("Writing to GP File");
                        WriteOutGlycopeptidesToFile(outputgpfile);
                    }
                    Console.WriteLine("Creating Hash");
                    CreateHashTableGlycopeptides(_glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, true);
                }
                else
                {
                    Console.WriteLine("Reading in from GP file");
                    LoadGlycopeptidesFromFile(gpFile , _glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, true );
                    Console.WriteLine("Creating Hash") ;
                    //CreateHashTableGlycopeptides(_glycoMap.MapMinMass - 1, _glycoMap.MapMaxMass + 1, false);

                }
            }

            Console.WriteLine("Loading Frag events and scoring");
            _glycopeptides.Clear();

            _glycoMap._AllMLNRecords.ForEach(delegate(MultiAlignRecord m)
            {
                bool process_mrecord = false;
                Console.WriteLine("UMC = " + m.ID) ;

                // Only load in thosse ions that match the glycopeptides list

                if (filter_glycopeptides)
                {
                    List<GlycopeptideRecord> candidate_gps = new List<GlycopeptideRecord>();
                    process_mrecord =  SearchGlycoPeptidesDictionary(m.Mass, ref candidate_gps, false);

                    if (process_mrecord)
                    {
                        /*m._CandidatePeptideSeq = new string[candidate_gps.Count];
                        m._CandidateProteinName = new string[candidate_gps.Count];
                        m._CandidateGlycanComposition = new string[candidate_gps.Count];
                        m._CandidateGlycanMass = new double[candidate_gps.Count];
                        m._CandidatePeptideMass = new double[candidate_gps.Count];
                        m._CandidateNGlycoSite = new string[candidate_gps.Count];
                        for (int i = 0; i < candidate_gps.Count; i++)
                        {
                            m._CandidateGlycanComposition[i] = candidate_gps[i].Glycan.composition;
                            m._CandidatePeptideSeq[i] = candidate_gps[i].Sequence.sequence;
                            m._CandidateProteinName[i] = candidate_gps[i].Sequence.proteinName;
                            m._CandidateGlycanMass[i] = candidate_gps[i].GlycanMonoMass;
                            m._CandidatePeptideMass[i] = candidate_gps[i].SequenceMonoMass; ;
                            m._CandidateNGlycoSite[i] = candidate_gps[i].Sequence.nGlycoSite;

                        }*/
                        m._CandidateGlycopeptideRecords = new GlycopeptideRecord[candidate_gps.Count];
                        for (int i = 0; i < candidate_gps.Count; i++)
                        {
                            m._CandidateGlycopeptideRecords[i] = candidate_gps[i];
                        }
                    }

                }
                else
                {
                    process_mrecord = true;
                }

                if (process_mrecord)
                {
                    bool store_record = false ; // This determins if the record needs to be stored
                    for (int i = 0; i < m._AssociatedUMCRecords.Count; i++)
                    {
                        // -- Start processing each umc -- //
                        UMCRecord u = m._AssociatedUMCRecords[i];
                        if (u.ScanRep == 0)
                            continue;

                        int datasetid = u.DatasetID;
                       // _RawData = new GlypID.Readers.clsRawData(DatasetNames[datasetid], GlypID.Readers.FileType.FINNIGAN);

                        double scanrange = Convert.ToDouble(u.ScanEnd - u.ScanStart);
                        int numscans = _RawData.GetNumScans();
                        double coverage = scanrange / numscans;
                        if (coverage >= max_coverage)
                        {
                            //_RawData.
                            continue;
                        }

                        // umc has passed filtering conditions so start processing each scan in umc
                        double min_ppm = 50;

                        // Process each MSn scan from start to stop of UMC
                        List<double> parents_observed = new List<double>();
                        List<FragEvents>allobserved_frag_events = new List<FragEvents>() ;
                        Classes.FragEvents e = new FragEvents();

                        double most_recent_precursor = 0;
                        int frag_id = 0;
                        for (int scan = u.ScanStart ; scan <= u.ScanEnd ; scan++)
                        {
                            if (_RawData.IsMSScan(scan))
                                continue;

                            // Start processing MSn scan
                            bool process_scan = false;
                            bool record_scan = false ;

                            // get parent
                            double parent_mz = _RawData.GetParentMz(scan);
                            short header_cs = (short) _RawData.GetParentChargeFromHeader(scan);

                            if (header_cs > 0)
                            {
                                double mass = _utils.CalculateMass(parent_mz, header_cs);
                                if (_utils.CalculateDelMassDa(mass, m.Mass) < _DA_Diff)
                                    process_scan = true;
                            }
                            else
                            {
                                // try default charge states
                                double mass1 = _utils.CalculateMass(parent_mz, 2);
                                if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff)
                                    process_scan = true;
                                double mass2 = _utils.CalculateMass(parent_mz, 3);
                                if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff)
                                    process_scan = true;
                                double mass3 = _utils.CalculateMass(parent_mz, 4);
                                if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff)
                                    process_scan = true;
                                double mass4 = _utils.CalculateMass(parent_mz, 5);
                                if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff)
                                    process_scan = true;
                                double mass5 = _utils.CalculateMass(parent_mz, 6);
                                if (_utils.CalculateDelMassDa(mass1, m.Mass) < _DA_Diff)
                                    process_scan = true;
                                double mass6 = _utils.CalculateMass(parent_mz, 7);
                                if (_utils.CalculateDelMassDa(mass2, m.Mass) < _DA_Diff)
                                    process_scan = true;
                            }
                            if (!process_scan)
                                continue;

                            // Scan has passed ALL filters
                            float[] msms_mzs = new float[1];
                            float[] msms_intensities = new float[1];
                            _RawData.GetRawData(scan, ref msms_mzs, ref msms_intensities);
                            if (_RawData.IsProfileScan(scan))
                                _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.PROFILE;
                            else
                                _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED;

                            if (most_recent_precursor != parent_mz)
                            {
                                // Only deisotope if parent has not been observed

                                // Store the previous Fragmentation event
                                if (e.HCDScore < 1)
                                {
                                    /*FragEvents tEvent = new FragEvents();
                                    tEvent = e;
                                    m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tEvent);*/
                                    allobserved_frag_events.Add(e);
                                }

                                // Clear up everything
                               /* e.ClearTransformRecord();
                                e.ClearHCD();
                                e.ClearETD();
                                e.ClearCID();
                                e.ClearGPInfo();*/
                                e = new FragEvents();
                                e.ID = frag_id;
                                frag_id++;

                                // Start over
                                parents_observed.Add(parent_mz);
                                most_recent_precursor = parent_mz;
                                float[] parent_mzs = new float[1];
                                float[] parent_intensities = new float[1];
                                Reset();

                                // ** ---- deisotope the precursor -- ** //
                                int parent_scan = _RawData.GetParentScan(scan);
                                double parent_scan_time = _RawData.GetScanTime(parent_scan);  //TODO
                                _RawData.GetRawData(parent_scan, ref parent_mzs, ref parent_intensities);

                                // Do peak finding
                                if (_RawData.IsProfileScan(parent_scan))
                                    _ParentPeakProcessor.ProfileType = GlypID.enmProfileType.PROFILE;
                                else
                                    _ParentPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED;
                                double thresh = GlypID.Utils.GetAverage(ref parent_intensities, float.MaxValue);
                                double background_intensity = GlypID.Utils.GetAverage(ref parent_intensities, (float)(5 * thresh));
                                _ParentPeakProcessor.SetPeakIntensityThreshold(background_intensity * _ParentPeakProcessorParams.PeakBackgroundRatio);
                                _ParentPeakProcessor.DiscoverPeaks(ref parent_mzs, ref parent_intensities, ref _ParentPeaks,
                                    Convert.ToSingle(_TransformParameters.MinMZ), Convert.ToSingle(_TransformParameters.MaxMZ), true);

                                // Pick out precursor and transform
                                double pep_intensity;
                                if (_TransformParameters.UseAbsolutePeptideIntensity)
                                    pep_intensity = _TransformParameters.AbsolutePeptideIntensity;
                                else
                                    pep_intensity = background_intensity * _TransformParameters.PeptideMinBackgroundRatio;
                                bool found = false;
                                if (_RawData.IsFTScan(parent_scan))
                                {
                                    found = _HornTransform.FindPrecursorTransform(Convert.ToSingle(background_intensity), Convert.ToSingle(pep_intensity), ref parent_mzs, ref parent_intensities,
                                        ref _ParentPeaks, Convert.ToSingle(parent_mz), ref _ParentTransformResults);
                                }
                                if (!found)
                                {
                                    // Low resolution data or bad high res spectra
                                    if (header_cs > 0)
                                    {
                                        double mono_mz = _RawData.GetParentMonoMzFromHeader(scan);
                                        if (mono_mz == 0)
                                            mono_mz = parent_mz;
                                        /*   GlypID.Peaks.clsPeak monoPeak = new GlypID.Peaks.clsPeak();
                                           _ParentPeakProcessor.GetClosestPeakMz(monoPeak, Convert.ToSingle(mono_mz));*/

                                        short[] charges = new short[1];
                                        charges[0] = header_cs;
                                        _HornTransform.AllocateValuesToTransform(Convert.ToSingle(mono_mz), 0, ref charges, ref _ParentTransformResults);
                                        found = true;
                                        record_scan = true;
                                        e.TransformResult = _ParentTransformResults[0];
                                        e.ParentMz = parent_mz;
                                        e.ParentScan = parent_scan;
                                        e.ParentScanTime = parent_scan_time;
                                    }
                                    /*else Removind this for now
                                    {
                                        // instrument has no charge just store 2 and 3.
                                        short[] charges = new short[2];
                                        charges[0] = 2;
                                        charges[1] = 3;
                                        _HornTransform.AllocateValuesToTransform(Convert.ToSingle(parent_mz), 0, ref charges, ref _ParentTransformResults);
                                    }*/
                                }
                                else
                                {
                                    double ppm = _utils.CalculateDelMassPPM(m.Mass, _ParentTransformResults[0].mdbl_mono_mw);
                                    if ((ppm <= min_ppm) || (Math.Abs(ppm - min_ppm) < 1E-05))
                                    {
                                        record_scan = true;
                                        e.TransformResult = _ParentTransformResults[0];
                                        e.ParentMz = parent_mz;
                                        e.ParentScan = parent_scan;
                                        e.ParentScanTime = parent_scan_time;
                                    }
                                }
                            }
                            else
                            {
                                if (e.TransformResult.mdbl_mono_mw > 0)
                                {
                                    // Has been deisotoped successfulle
                                    record_scan = true ;
                                }
                            }

                            //  -------- Process scan according to type   -------- //
                            if (isCID[datasetid] && _RawData.IsCIDScan(scan) && record_scan)
                            {
                                // CID scan
                                _MSMSPeakProcessor.SetPeakIntensityThreshold(0); // Since yin takes top 20 on his own.
                                _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks, Convert.ToSingle(_TransformParameters.MinMZ), Convert.ToSingle(_TransformParameters.MaxMZ), false);
                                if (_MSMSPeaks.Length > 0)
                                {
                                    //Scoring
                                    GlypID.CIDScoring.clsCIDScoringScanResults[] tCIDScoreResults = new GlypID.CIDScoring.clsCIDScoringScanResults[1];
                                    bool found_score = _CIDScoring.ScoreCIDSpectra(ref _MSMSPeaks, ref msms_mzs, ref msms_intensities, ref _ParentTransformResults, ref tCIDScoreResults);
                                    if (found_score & tCIDScoreResults[0].mdbl_cid_score > e.CIDScore)
                                    {

                                        e.CIDScan = scan;
                                        e.CIDMzs = msms_mzs;
                                        e.CIDIntensities = msms_intensities;
                                        e.CIDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length];
                                        e.CIDProfileType = _MSMSPeakProcessor.ProfileType;
                                        e.CIDScore = tCIDScoreResults[0].mdbl_cid_score;
                                        Array.Copy(_MSMSPeaks, e.CIDPeaks, _MSMSPeaks.Length);
                                    }
                                }
                            }

                            if (isHCD[datasetid] && _RawData.IsHCDScan(scan) && record_scan)
                            {
                                // HCD Scan
                                double hcd_background_intensity = GlypID.Utils.GetAverage(ref msms_intensities, ref msms_mzs, Convert.ToSingle(_ScoringParameters.MinHCDMz), Convert.ToSingle(_ScoringParameters.MaxHCDMz));
                                _MSMSPeakProcessor.SetPeakIntensityThreshold(hcd_background_intensity);
                                _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks, Convert.ToSingle(_ScoringParameters.MinHCDMz), Convert.ToSingle(_ScoringParameters.MaxHCDMz), false);
                                if (_MSMSPeaks.Length > 0)
                                {
                                    // Score
                                    GlypID.HCDScoring.clsHCDScoringScanResults[] tHCDScoreResults = new GlypID.HCDScoring.clsHCDScoringScanResults[1];
                                    double score = _HCDScoring.ScoreHCDSpectra(ref _MSMSPeaks, ref msms_mzs, ref msms_intensities, ref _ParentTransformResults, ref tHCDScoreResults);
                                    if (score < e.HCDScore) // This makes sure that within frag events of the same parent, the lowest one is chosen
                                    {
                                        // Store
                                        store_record = true;

                                        e.HCDScan = scan;
                                        e.HCDMzs = msms_mzs;
                                        e.HCDIntensities = msms_intensities;
                                        e.HCDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length];
                                        /*m._AssociatedUMCRecords[i].HCDScanRep = scan;
                                        m._AssociatedUMCRecords[i].HCDScanRepET = _RawData.GetScanTime(scan);*/
                                        e.HCDProfileType = _MSMSPeakProcessor.ProfileType;
                                        Array.Copy(_MSMSPeaks, e.HCDPeaks, _MSMSPeaks.Length);
                                        e.HCDScore = tHCDScoreResults[0].mdbl_hcd_score;
                                        e.GlycanType = (GlypID.enmGlycanType) tHCDScoreResults[0].menm_glycan_type ;

                                    }
                                }
                            }

                            if (isETD[datasetid] && _RawData.IsETDScan(scan) && record_scan)
                            {

                                // ETD Scan
                                _MSMSPeakProcessor.SetPeakIntensityThreshold(0);
                                _MSMSPeakProcessor.ProfileType = GlypID.enmProfileType.CENTROIDED;
                                _MSMSPeakProcessor.DiscoverPeaks(ref msms_mzs, ref msms_intensities, ref _MSMSPeaks,
                                    0, Convert.ToSingle(parent_mz), false);

                                if ((_MSMSPeaks.Length > 1) && (e.HCDScore <1))
                                {
                                    // Score
                                    int best_scoring_index = -1;
                                    double max_score = 0;

                                    for (int k = 0; k < m._CandidateGlycopeptideRecords.Length; k++)
                                    {
                                        // Confirm glycan type matches with HCD prediction
                                        bool process_result = false;
                                        if ((e.GlycanType == GlypID.enmGlycanType.CS) || e.GlycanType == GlypID.enmGlycanType.HY)
                                        {
                                            if (m._CandidateGlycopeptideRecords[k].Glycan.numNeuAc >0)
                                                process_result = true;
                                            else
                                                process_result = false;
                                        }
                                        else
                                            process_result = true;

                                        // Stupidly planned type conversion ; Just doing the bare minimum
                                        GlypID.ETDScoring.clsETDScoringScanResults thisResult = new GlypID.ETDScoring.clsETDScoringScanResults();
                                        thisResult.mdbl_parent_mz = e.ParentMz;
                                        thisResult.mdbl_mono_mw = e.TransformResult.mdbl_mono_mw;
                                        thisResult.mshort_cs = e.TransformResult.mshort_cs;
                                        thisResult.mstr_glyco_site = m._CandidateGlycopeptideRecords[k].Sequence.nGlycoSite;
                                        thisResult.mstr_nglyco_site = m._CandidateGlycopeptideRecords[k].Sequence.nGlycoSite;
                                        thisResult.mstr_pep_seq = m._CandidateGlycopeptideRecords[k].Sequence.sequence;
                                        thisResult.mdbl_glycan_mass = m._CandidateGlycopeptideRecords[k].GlycanMonoMass;

                                        if (thisResult.mstr_pep_seq != "" && process_result)
                                        {
                                            double etd_score = _ETDScoring.ScoreETDSpectra(ref _MSMSPeaks, thisResult);
                                            if (etd_score >= max_score)
                                            {
                                                thisResult.mdbl_etd_score = etd_score;
                                                best_scoring_index = k;
                                                max_score = etd_score;
                                            }
                                        }
                                    }

                                    // Store
                                    if ((best_scoring_index != -1) && (max_score > e.ETDScore))
                                    {
                                        // Store
                                        e.ETDScan = scan;
                                        e.ETDMzs = msms_mzs;
                                        e.ETDIntensities = msms_intensities;
                                        e.ETDPeaks = new GlypID.Peaks.clsPeak[_MSMSPeaks.Length];
                                        Array.Copy(_MSMSPeaks, e.ETDPeaks, _MSMSPeaks.Length);

                                        e.ETDScore = max_score;
                                        e.GP_Record = m._CandidateGlycopeptideRecords[best_scoring_index];
                                        e.FalseHit = e.GP_Record.IsDecoy;

                                       /* if (e.FalseHit)
                                            _tMap._AllFalseHitsFDRScore.Add(e.ETDScore);
                                        else
                                            _tMap._AllTrueHitsFDRScore.Add(e.ETDScore) ; */

                                        // m._AssociatedUMCRecords[i].ETDScanRep = scan;

                                    }
                                }
                            }
                        }
                        // Store the most recent fragmentation event
                        if (e.HCDScore < 1)
                        {
                            /*FragEvents tEvent = new FragEvents(e);
                            m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tEvent);*/
                            allobserved_frag_events.Add(e);
                        }
                       //  _RawData.Close();

                        m._AssociatedUMCRecords[i]._AssociatedFragEvents.Clear();
                        for (int kk = 0; kk < allobserved_frag_events.Count; kk++)
                        {
                            FragEvents tempE = allobserved_frag_events[kk] ; //as FragEvents;
                            m._AssociatedUMCRecords[i]._AssociatedFragEvents.Add(tempE);
                        }
                        m._AssociatedUMCRecords[i].SetRepScores();
                    }
                    if (store_record)
                        _tMap.AddRecord(m);

                }

            });

            if (_tMap._AllMLNRecords.Count != _glycoMap._AllMLNRecords.Count)
            {
                _glycoMap.ClearRecords();
                for (int k = 0; k < _tMap._AllMLNRecords.Count; k++)
                {
                    _glycoMap.AddRecord(_tMap._AllMLNRecords[k]);
                }
            }
               // _glycoMap._AllTrueHitsFDRScore = _tMap._AllTrueHitsFDRScore;
            //_glycoMap._AllFalseHitsFDRScore = _tMap._AllFalseHitsFDRScore;

            _tMap.ClearRecords();
        }
Example #6
0
        public void SequenceCIDPeaks(ref Classes.MapRecord _glycoMap, ref Classes.Params _params, string sequencingFolder)
        {
            Classes.MapRecord _tempMap = new MapRecord();
            _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames;
            _tempMap._IsCID = _glycoMap._IsCID;
            _tempMap._IsETD = _glycoMap._IsETD;
            _tempMap._IsHCD = _glycoMap._IsHCD;

            Classes.FragEvents e = new FragEvents();
            if (sequencingFolder == "")
             sequencingFolder = @"c:\sequencing\";

            for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++)
            {
                MultiAlignRecord m = new MultiAlignRecord();
                m = _glycoMap._AllMLNRecords[i];
                int num_records = m._AssociatedUMCRecords.Count;

                 if (m.ID == 2290)
                {

                    bool test = true;
                }

                //for (int j = 0; j < num_records; j++)
                //{
                   // for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++)
                for (int k = 0; k < m._ClusterRepFragEvents.Count; k++)
                {
                    e = new FragEvents();
                    e = m._ClusterRepFragEvents[k]; // m._AssociatedUMCRecords[j]._AssociatedFragEvents[k];

                    if (e.ETDScore > 0)
                    {
                        float PeptideMass = 0;
                        float GlcNAcMass = 0;
                        COL.MassLib.MSScan _msScan = new COL.MassLib.MSScan(e.CIDMzs, e.CIDIntensities, Convert.ToSingle(e.TransformResult.mdbl_mz),
                            Convert.ToSingle(e.TransformResult.mdbl_mono_mw), Convert.ToSingle(e.TransformResult.mdbl_average_mw), Convert.ToInt32(e.TransformResult.mshort_cs));
                        if (e.TransformResult.mdbl_average_mw > 0)
                        {
                            PeptideMass = Convert.ToSingle(e.GP_Record.SequenceAverageMass); // this means proper deisotoping has occured so use average compostion
                            GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanAVGMass(COL.GlycoLib.Glycan.Type.HexNAc);
                        }
                        else
                        {
                            PeptideMass = Convert.ToSingle(e.GP_Record.SequenceMonoMass); // CS has been assigned, in which case both y1 and precursor should be just mono
                            GlcNAcMass = COL.GlycoLib.GlycanMass.GetGlycanMass(COL.GlycoLib.Glycan.Type.HexNAc);
                        }

                        short y1cs = e.TransformResult.mshort_cs;
                        y1cs--;
                        while (y1cs > 0)
                        {
                            float y1Mz = ((PeptideMass + GlcNAcMass) + (float)_utils._CC_MASS * y1cs) / y1cs;
                            COL.GlycoSequence.GlycanSequencing _Gs = new COL.GlycoSequence.GlycanSequencing(_msScan, y1Mz, y1cs, e.GP_Record.Glycan.numHex,
                                e.GP_Record.Glycan.numHexNAc, e.GP_Record.Glycan.numDeHex, e.GP_Record.Glycan.numNeuAc, 0, sequencingFolder, true, 0.8f, 60);

                            _Gs.NumbersOfPeaksForSequencing = 140;
                            _Gs.CreatePrecursotMZ = true;
                            _Gs.RewardForCompleteStructure = 3;

                            if (e.TransformResult.mdbl_average_mw > 0)
                                _Gs.UseAVGMass = true;
                            else
                                _Gs.UseAVGMass = false;
                            int structure_count = _Gs.StartSequencing();
                            if (structure_count > 0)
                            {
                                List<COL.GlycoLib.GlycanStructure> topstructures = _Gs.GetTopRankScoreStructre(1);
                                e.CIDSequencingScore = topstructures[0].Score;
                                e.GP_Record.GlycanSequence = topstructures[0].IUPACString;

                                // Printing out the sequences
                                string opfile = sequencingFolder + m.ID.ToString() + "_" + e.CIDScan.ToString() + ".txt";
                                Utils.CSVFileHandler cidString = new CSVFileHandler(opfile, CSVFileHandler.WRITE_ONLY);
                                cidString.openFile();
                                for (int zz = 0; zz < topstructures.Count; zz++)
                                {
                                    cidString.writeLine(topstructures[zz].IUPACString);
                                    /*COL.GlycoLib.GlycansDrawer _gdraw = new COL.GlycoLib.GlycansDrawer(topstructures[zz].IUPACString, false);
                                    System.Drawing.Image img1 = _gdraw.GetImage();*/
                                }
                                cidString.closeFile();
                                break;
                            }
                            y1cs--;
                        }
                    }

                }

            }
        }
Example #7
0
        public void ResolveIdentificationsAcrossClusters(ref Classes.MapRecord _glycoMap, ref Classes.Params _params)
        {
            Classes.MapRecord _tempMap = new MapRecord();
            _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames;
            _tempMap._IsCID = _glycoMap._IsCID;
            _tempMap._IsETD = _glycoMap._IsETD;
            _tempMap._IsHCD = _glycoMap._IsHCD;
            _tempMap._AllFalseHitsFDRScore = _glycoMap._AllFalseHitsFDRScore;
            _tempMap._AllTrueHitsFDRScore = _glycoMap._AllTrueHitsFDRScore;
            int num_clashes = 0;

            for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++)
            {
                MultiAlignRecord m = new MultiAlignRecord();
                m = _glycoMap._AllMLNRecords[i];

                if (_glycoMap._IsETD.Contains(true))
                {
                    if (m._ClusterRepFragEvents.Count > 0)
                    {
                        if (m.HasUniformIDsAcrossClusters())
                        {
                            m.SetIdsBasedOnHighestETD();
                            if (m.IDLabel == _ID_label.Unverified)
                                m.SetIdsBasedOnMassAndGlycanType(_params.ScoringParams.PPMTolerance, true);

                        }
                        else
                        {
                            // Need to think about this
                            bool debug = true;
                            num_clashes++;
                        }
                    }
                    else
                    {
                        m.SetIdsBasedOnMassAndGlycanType(_params.ScoringParams.PPMTolerance, false);
                    }
                }
                else
                {
                    //m.
                }

            }
        }
Example #8
0
        public void GetRepresentatives(ref Classes.MapRecord _glycoMap, ref Classes.Params _params)
        {
            Classes.MapRecord _tempMap = new MapRecord();
            _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames;
            _tempMap._IsCID = _glycoMap._IsCID;
            _tempMap._IsETD = _glycoMap._IsETD;
            _tempMap._IsHCD = _glycoMap._IsHCD;
            _tempMap._AllFalseHitsFDRScore = _glycoMap._AllFalseHitsFDRScore;
            _tempMap._AllTrueHitsFDRScore = _glycoMap._AllTrueHitsFDRScore;

            bool use_etd = false ;
            if (_glycoMap._IsETD.Contains(true))
                use_etd = true ;

            FragEvents tempE = new FragEvents();
            for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++)
            {
                MultiAlignRecord m = new MultiAlignRecord();
                m = _glycoMap._AllMLNRecords[i];

                int num_clusters = m._ClusterNames.Count;

                if (m.ID == 2290)
                {

                    bool test = true;
                }
                for (int c = 0; c < num_clusters; c++)
                {
                    string clustername = m._ClusterNames[c];
                    string[] spectra = clustername.Split('-');

                    Dictionary <int, List<int>> umcs_frag_ids  = new Dictionary<int, List<int>>() ;
                    List<int>frag_to_look  = new List<int> () ;

                    foreach (string s in spectra)
                    {
                        string[] parts = s.Split('_');
                        int umc_id = Convert.ToInt32(parts[1]);
                        int frag_id = Convert.ToInt32(parts[2]) ;
                        if (umcs_frag_ids.ContainsKey(umc_id))
                        {
                            umcs_frag_ids[umc_id].Add(frag_id);
                        }
                        else
                        {
                            frag_to_look.Clear() ;
                            frag_to_look.Add(frag_id) ;
                            umcs_frag_ids.Add(umc_id, frag_to_look);
                        }
                    }

                    double max_etd_score = 0;
                    double min_hcd_score = 0 ;
                    FragEvents maxFragEvent = new FragEvents();
                    for (int j = 0; j < m._AssociatedUMCRecords.Count; j++)
                    {
                         if (umcs_frag_ids.ContainsKey(m._AssociatedUMCRecords[j].DatasetID))
                         {
                             tempE = new FragEvents();
                             if (use_etd)
                             {
                                 tempE = m._AssociatedUMCRecords[j].FragEventWithHighestETDScore(true, umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]);
                                 if (tempE.ETDScore > max_etd_score)
                                 {
                                     maxFragEvent = tempE;
                                 }
                             }
                             else
                             {
                                 if (!_params.ProcessOnlyNGlycopeptides)
                                     maxFragEvent = m._AssociatedUMCRecords[j].FragEventWithLowestHCDScore(umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]);
                             }

                         }
                    }
                    if ((maxFragEvent.ETDScore > 0) || (!_params.ProcessOnlyNGlycopeptides))
                        m._ClusterRepFragEvents.Add(maxFragEvent);
                    /*else
                    {
                        FragEvents minHCDFragEvent = new FragEvents();
                        double min_hcd_score = 1;
                        for (int j= 0 ; j < m._AssociatedUMCRecords.Count ; j++)
                        {
                            if (umcs_frag_ids.ContainsKey(m._AssociatedUMCRecords[j].DatasetID))
                            {
                                tempE = new FragEvents();
                                tempE = m._AssociatedUMCRecords[j].FragEventWithLowestHCDScore(umcs_frag_ids[m._AssociatedUMCRecords[j].DatasetID]);
                                if (tempE.HCDScore < min_hcd_score)
                                    minHCDFragEvent = tempE;
                            }
                        }
                        m._ClusterRepFragEvents.Add(minHCDFragEvent);
                    }      */
                }
            }
        }
Example #9
0
        /// <summary>
        /// Function to cluster CID spectra in each record.  
        /// </summary>
        /// <param name="_glycoMap"></param>
        /// <param name="_params"></param>
        public void ClusterRecordsOnCID(ref Classes.MapRecord _glycoMap, ref Classes.Params _params)
        {
            Classes.MapRecord _tempMap = new MapRecord();
            _tempMap._AssociatedDatasetNames = _glycoMap._AssociatedDatasetNames;
            _tempMap._IsCID = _glycoMap._IsCID;
            _tempMap._IsETD = _glycoMap._IsETD;
            _tempMap._IsHCD = _glycoMap._IsHCD;

            int Id = 0;

            FragEvents tempE = new FragEvents();

            for (int i = 0; i < _glycoMap._AllMLNRecords.Count; i++)
            {
                MultiAlignRecord m = new MultiAlignRecord();
                m = _glycoMap._AllMLNRecords[i];
                int num_records = m._AssociatedUMCRecords.Count;

                short mincs = m.MinChargeStateObserved();
                short maxcs = m.MaxChargeStateObserved();

                for (short thiscs = mincs; thiscs <= maxcs; thiscs++)
                {
                    int spectra_num = 0;
                    string cum_spectra_names = null;
                    List<string> cluster_names = new List<string>();
                    List<int> orphan_ids = new List<int>();
                    for (int j = 0; j < num_records; j++)
                    {
                        if (m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count == 0)
                        {
                            // Not fragmented in this UMC but keep track of this  but

                            // // This happens which means there was no fragmentation event then (or) HCD score was bad
                            if (m._AssociatedUMCRecords[j].Abundance > 0)
                                orphan_ids.Add(m._AssociatedUMCRecords[j].DatasetID);
                        }
                        for (int k = 0; k < m._AssociatedUMCRecords[j]._AssociatedFragEvents.Count; k++)
                        {
                            if ((m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks[0] != null) &&
                                (m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].TransformResult.mshort_cs == thiscs))
                            {
                                string spectra_name = "Spectra_" + m._AssociatedUMCRecords[j].DatasetID + "_" + m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].ID + "_" +
                                    spectra_num + "_" + m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDScan;

                                GlypID.Peaks.clsPeak[] thisCIDPeaks = new GlypID.Peaks.clsPeak[m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks.Length];
                                Array.Copy(m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks, thisCIDPeaks, m._AssociatedUMCRecords[j]._AssociatedFragEvents[k].CIDPeaks.Length);
                                _CIDSpectralUtilities.AddPeaksToList(ref thisCIDPeaks, spectra_name);
                                spectra_num++;
                                if (cum_spectra_names != null)
                                    cum_spectra_names = cum_spectra_names + "-" + spectra_name;
                                else
                                    cum_spectra_names = spectra_name;
                            }
                        }
                    }

                    int num_clusters = _CIDSpectralUtilities.ClusterSpectraInList();
                    _CIDSpectralUtilities.GetClusterNames(ref cluster_names);

                    if (num_clusters > 1)
                    {
                        // Indicates glycoforms
                        bool debug = true;
                        debug = true;
                    }

                    for (int c = 0; c < num_clusters; c++)
                    {
                        m._ClusterNames.Add(cluster_names[c]);
                    }

                    _CIDSpectralUtilities.Clear();
                }
                _tempMap.AddRecord(m);
            }
                 // Restore them.
            _glycoMap.ClearRecords();
            for (int i = 0; i < _tempMap._AllMLNRecords.Count; i++)
            {
                _glycoMap.AddRecord(_tempMap._AllMLNRecords[i]);
            }

                    /*else
                    {
                        /* MultiAlignRecord _tempM = new MultiAlignRecord(m);
                         _tempM.ID = Id;
                         _tempMap.AddRecord(_tempM);
                         Id++; */

                    /* if (cum_spectra_names != null)
                     {
                         // Choose the spectra with the greatest SNR
                         num_clusters = 1;
                         cluster_names.Add(cum_spectra_names);
                         _CIDSpectralUtilities.AssignClusters(ref cluster_names);
                     }
                 }*/

                   /* for (int k = 0; k < num_clusters; k++)
                    {
                        // ----- Get a representative spectrum index for each cluster ---//
                        MultiAlignRecord _tempM = new MultiAlignRecord(m);
                        _tempM.ID = Id;
                        _tempM._AssociatedUMCRecords.Clear();

                        GlypID.Peaks.clsPeak[] repCIDPeaks = new GlypID.Peaks.clsPeak[0];
                        int repOrigIndex = _CIDSpectralUtilities.GetRepresentativePeaksFromCluster(k, ref repCIDPeaks, _params.ScoringParams.MinCIDMz, _params.ScoringParams.MaxCIDMz, true);
                        if (repCIDPeaks.Length > 1)
                        {
                            _tempM._RepresentativeCIDPeaks = repCIDPeaks;
                            _tempM._RepresentativeDatasetID_CID = repOrigIndex;
                        }
                        // Attach UMCs corresponding to that cluster.
                        List<int> allOrigIDs = new List<int>();
                        _CIDSpectralUtilities.GetOriginalIDFromCluster(k, ref allOrigIDs);
                        for (int j = 0; j < num_records; j++)
                        {
                            int id = m._AssociatedUMCRecords[j].DatasetID;
                            if (allOrigIDs.Exists(element => element == id))
                            {
                                UMCRecord _tempUMC = new UMCRecord();
                                _tempUMC = m._AssociatedUMCRecords[j];
                                _tempM._AssociatedUMCRecords.Add(_tempUMC);
                            }
                            else if (orphan_ids.Exists(element => element == id)) // This takes care of non fragmentation but still has stuff present
                            {
                                UMCRecord _tempUMC = new UMCRecord();
                                _tempUMC = m._AssociatedUMCRecords[j];
                                _tempM._AssociatedUMCRecords.Add(_tempUMC);
                            }

                        }

                        _tempMap.AddRecord(_tempM);
                        Id++;
                    }*/
        }