Esempio n. 1
0
        //Generates all the combinations of a certain length, except duplicates
        private static IEnumerable <PtmSet> combinations(List <Ptm> all_ptms, int combination_length, Dictionary <double, int> modification_ranks, int added_ptm_penalization)
        {
            Ptm[]       result = new Ptm[combination_length];
            Stack <int> stack  = new Stack <int>();

            stack.Push(0);

            while (stack.Count > 0)
            {
                int result_index = stack.Count - 1;
                int mod_index    = stack.Pop();
                while (mod_index < all_ptms.Count)
                {
                    result[result_index] = all_ptms[mod_index];
                    result_index++;
                    mod_index++;
                    if (mod_index < all_ptms.Count)
                    {
                        stack.Push(mod_index);
                    }
                    if (result_index == combination_length)
                    {
                        Ptm[] destinationArray = new Ptm[combination_length];
                        Array.Copy(result, destinationArray, combination_length);
                        yield return(new PtmSet(destinationArray.ToList(), modification_ranks, added_ptm_penalization));

                        break;
                    }
                }
            }
        }
        //Reading in metamopheus excel
        public List <TopDownHit> ReadMetamopheusFile(InputFile file)
        {
            //if neucode labeled, calculate neucode light theoretical AND observed mass! --> better for matching up
            //if carbamidomethylated, add 57 to theoretical mass (already in observed mass...)
            aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            List <TopDownHit> td_hits = new List <TopDownHit>();//for one line in excel file

            //creates dictionary to find mods
            Dictionary <string, Modification> mods = Sweet.lollipop.theoretical_database.all_mods_with_mass.ToDictionary(kv => kv.IdWithMotif, kv => kv);


            List <List <string> > cells = ExcelReader.get_cell_strings(file, true);//This returns the entire sheet except for the header. Each row of cells is one List<string>

            //get ptms on proteoform -- check for mods. IF not in database, make new topdown mod, show Warning message.
            Parallel.ForEach(cells, cellStrings =>
            {
                bool add_topdown_hit = true; //if PTM or accession not found, will not add (show warning)
                if (cellStrings.Count == 55)
                {
                    List <Ptm> new_ptm_list = new List <Ptm>();
                    //if bad mod itll catch it to add to bad_topdown_ptms
                    try
                    {
                        PeptideWithSetModifications modsIdentifier = new PeptideWithSetModifications(cellStrings[14].Split('|')[0], mods);

                        var ptm_list = modsIdentifier.AllModsOneIsNterminus;

                        //for each  entry in ptm_list make a new Ptm and add it to the new_ptm_list
                        foreach (KeyValuePair <int, Proteomics.Modification> entry in ptm_list)
                        {
                            Modification mod = Sweet.lollipop.theoretical_database.uniprotModifications.Values.SelectMany(m => m).Where(m => m.IdWithMotif == entry.Value.IdWithMotif).FirstOrDefault();
                            var Ptm          = new Ptm();

                            if (mod != null)
                            {
                                new_ptm_list.Add(new Ptm(entry.Key, entry.Value));
                            }
                            else
                            {
                                lock (bad_topdown_ptms)
                                {
                                    //error is somewahre in sequece
                                    bad_topdown_ptms.Add("Mod Name:" + entry.Value.IdWithMotif + " at " + entry.Key);
                                    add_topdown_hit = false;
                                }
                            }
                        }
                    }
                    catch (MzLibUtil.MzLibException)
                    {
                        lock (bad_topdown_ptms)
                        {
                            //error is somewahre in sequece
                            bad_topdown_ptms.Add("Bad mod at " + cellStrings[0] + " scan " + cellStrings[1]);
                            add_topdown_hit = false;
                        }
                    }

                    //This is the excel file header:
                    //cellStrings[0]=File Name
                    //cellStrings[1]=Scan Number
                    //cellStrings[2]=Scan Retention Time
                    //cellStrings[3]=Num Experimental Peaks
                    //cellStrings[4]=Total Ion Current
                    //cellStrings[5]=Precursor Scan Number
                    //cellStrings[6]=Precursor Charge
                    //cellStrings[7]=Precursor MZ
                    //cellStrings[8]=Precursor Mass
                    //cellStrings[9]=Score
                    //cellStrings[10]=Delta Score
                    //cellStrings[11]=Notch
                    //cellStrings[12]=Different Peak Matches
                    //cellStrings[13]=Base Sequence
                    //cellStrings[14]=Full Sequence
                    //cellStrings[15]=Essential Sequence
                    //cellStrings[16]=PSM Count
                    //cellStrings[17]=Mods
                    //cellStrings[18]=Mods Chemical Formulas
                    //cellStrings[19]=Mods Combined Chemical Formula
                    //cellStrings[20]=Num Variable Mods
                    //cellStrings[21]=Missed Cleavages
                    //cellStrings[22]=Peptide Monoisotopic Mass
                    //cellStrings[23]=Mass Diff (Da)
                    //cellStrings[24]=Mass Diff (ppm)
                    //cellStrings[25]=Protein Accession
                    //cellStrings[26]=Protein Name
                    //cellStrings[27]=Gene Name
                    //cellStrings[28]=Organism Name
                    //cellStrings[29]=Intersecting Sequence Variations
                    //cellStrings[30]=Identified Sequence Variations
                    //cellStrings[31]=Splice Sites
                    //cellStrings[32]=Contaminant
                    //cellStrings[33]=Decoy
                    //cellStrings[34]=Peptide Description
                    //cellStrings[35]=Start and End Residues In Protein
                    //cellStrings[36]=Previous Amino Acid
                    //cellStrings[37]=Next Amino Acid
                    //cellStrings[38]=All Scores
                    //cellStrings[39]=Theoreticals Searched
                    //cellStrings[40]=Decoy/Contaminant/Target
                    //cellStrings[41]=Matched Ion Series
                    //cellStrings[42]=Matched Ion Mass-To-Charge Ratios
                    //cellStrings[43]=Matched Ion Mass Diff (Da)
                    //cellStrings[44]=Matched Ion Mass Diff (Ppm)
                    //cellStrings[45]=Matched Ion Intensities
                    //cellStrings[46]=Matched Ion Counts
                    //cellStrings[47]=Localized Scores
                    //cellStrings[48]=Improvement Possible
                    //cellStrings[49]=Cumulative Target
                    //cellStrings[50]=Cumulative Decoy
                    //cellStrings[51]=QValue
                    //cellStrings[52]=Cumulative Target Notch
                    //cellStrings[53]=Cumulative Decoy Notch
                    //cellStrings[54]=QValue Notch
                    //cellStrings[55]=eValue
                    //cellStrings[56]=eScore



                    if (cellStrings[35].Length > 0)
                    {
                        string[] ids = cellStrings[35].Split('|');
                        //splits the string to get the value of starting index
                        string[] index = ids[0].Split(' ');

                        string[] startIndexValue = index[0].Split('[');
                        string startResidues     = startIndexValue[1];

                        //splits string to get value of ending index
                        string[] endIndexValue = index[2].Split(']');
                        string endResidues     = endIndexValue[0];


                        if (add_topdown_hit)
                        {
                            //if bad mod u want td hit to be false
                            TopDownHit td_hit = new TopDownHit(aaIsotopeMassList, file, TopDownResultType.TightAbsoluteMass, cellStrings[25], cellStrings[14], cellStrings[25], cellStrings[26], cellStrings[13],
                                                               Int32.TryParse(startResidues, out int j) ? j : 0, Int32.TryParse(endResidues, out int i) ? i : 0, new_ptm_list, Double.TryParse(cellStrings[8], out double d) ? d : 0, Double.TryParse(cellStrings[22], out d) ? d : 0,
                                                               Int32.TryParse(cellStrings[1], out i) ? i : 0, Double.TryParse(cellStrings[2], out d) ? d : 0, cellStrings[0].Split('.')[0], Double.TryParse(cellStrings[8], out d) ? d : 0, Sweet.lollipop.min_score_td + 1);


                            if (td_hit.begin > 0 && td_hit.end > 0 && td_hit.theoretical_mass > 0 && td_hit.pscore > 0 && td_hit.reported_mass > 0 && td_hit.score > 0 &&
                                td_hit.ms2ScanNumber > 0 && td_hit.ms2_retention_time > 0)
                            {
                                lock (td_hits) td_hits.Add(td_hit);
                            }
                        }
                    }
                }
            });
            return(td_hits);
        }